diff --git contrib/src/test/results/clientnegative/serde_regex.q.out contrib/src/test/results/clientnegative/serde_regex.q.out index 4edc297..580b623 100644 --- contrib/src/test/results/clientnegative/serde_regex.q.out +++ contrib/src/test/results/clientnegative/serde_regex.q.out @@ -40,9 +40,6 @@ WITH SERDEPROPERTIES ( ) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME serde_regex) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL host TOK_STRING) (TOK_TABCOL identity TOK_STRING) (TOK_TABCOL user TOK_STRING) (TOK_TABCOL time TOK_STRING) (TOK_TABCOL request TOK_STRING) (TOK_TABCOL status TOK_INT) (TOK_TABCOL size TOK_INT) (TOK_TABCOL referer TOK_STRING) (TOK_TABCOL agent TOK_STRING)) (TOK_TABLESERIALIZER (TOK_SERDENAME 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY "input.regex" "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))?") (TOK_TABLEPROPERTY "output.format.string" "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s"))))) TOK_TBLTEXTFILE) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -51,16 +48,13 @@ STAGE PLANS: Create Table Operator: Create Table columns: host string, identity string, user string, time string, request string, status int, size int, referer string, agent string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat serde name: org.apache.hadoop.hive.contrib.serde2.RegexSerDe serde properties: input.regex ([^ ]*) ([^ ]*) ([^ ]*) (-|\[[^\]]*\]) ([^ "]*|"[^"]*") (-|[0-9]*) (-|[0-9]*)(?: ([^ "]*|"[^"]*") ([^ "]*|"[^"]*"))? output.format.string %1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s name: serde_regex - isExternal: false PREHOOK: query: CREATE TABLE serde_regex( host STRING, diff --git contrib/src/test/results/clientpositive/dboutput.q.out contrib/src/test/results/clientpositive/dboutput.q.out index bbe82bf..8ad0e8c 100644 --- contrib/src/test/results/clientpositive/dboutput.q.out +++ contrib/src/test/results/clientpositive/dboutput.q.out @@ -44,9 +44,6 @@ dboutput('jdbc:derby:../build/test_dboutput_db','','', limit 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION dboutput 'jdbc:derby:../build/test_dboutput_db;create=true' '' '' 'CREATE TABLE app_info ( kkey VARCHAR(255) NOT NULL, vvalue VARCHAR(255) NOT NULL, UNIQUE(kkey))')) (TOK_SELEXPR (TOK_FUNCTION dboutput 'jdbc:derby:../build/test_dboutput_db' '' '' 'INSERT INTO app_info (kkey,vvalue) VALUES (?,?)' '20' 'a')) (TOK_SELEXPR (TOK_FUNCTION dboutput 'jdbc:derby:../build/test_dboutput_db' '' '' 'INSERT INTO app_info (kkey,vvalue) VALUES (?,?)' '20' 'b'))) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -54,23 +51,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: dboutput('jdbc:derby:../build/test_dboutput_db;create=true','','','CREATE TABLE app_info ( kkey VARCHAR(255) NOT NULL, vvalue VARCHAR(255) NOT NULL, UNIQUE(kkey))') - type: int - expr: dboutput('jdbc:derby:../build/test_dboutput_db','','','INSERT INTO app_info (kkey,vvalue) VALUES (?,?)','20','a') - type: int - expr: dboutput('jdbc:derby:../build/test_dboutput_db','','','INSERT INTO app_info (kkey,vvalue) VALUES (?,?)','20','b') - type: int + expressions: dboutput('jdbc:derby:../build/test_dboutput_db;create=true','','','CREATE TABLE app_info ( kkey VARCHAR(255) NOT NULL, vvalue VARCHAR(255) NOT NULL, UNIQUE(kkey))') (type: int), dboutput('jdbc:derby:../build/test_dboutput_db','','','INSERT INTO app_info (kkey,vvalue) VALUES (?,?)','20','a') (type: int), dboutput('jdbc:derby:../build/test_dboutput_db','','','INSERT INTO app_info (kkey,vvalue) VALUES (?,?)','20','b') (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -125,9 +119,6 @@ dboutput('jdbc:derby:../build/test_dboutput_db','','', FROM src WHERE key < 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION dboutput 'jdbc:derby:../build/test_dboutput_db' '' '' 'INSERT INTO app_info (kkey,vvalue) VALUES (?,?)' (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -135,22 +126,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: dboutput('jdbc:derby:../build/test_dboutput_db','','','INSERT INTO app_info (kkey,vvalue) VALUES (?,?)',key,value) - type: int + expressions: dboutput('jdbc:derby:../build/test_dboutput_db','','','INSERT INTO app_info (kkey,vvalue) VALUES (?,?)',key,value) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git contrib/src/test/results/clientpositive/fileformat_base64.q.out contrib/src/test/results/clientpositive/fileformat_base64.q.out index ede91e6..058a306 100644 --- contrib/src/test/results/clientpositive/fileformat_base64.q.out +++ contrib/src/test/results/clientpositive/fileformat_base64.q.out @@ -12,9 +12,6 @@ CREATE TABLE base64_test(key INT, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextOutputFormat' POSTHOOK: type: CREATETABLE -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME base64_test) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL key TOK_INT) (TOK_TABCOL value TOK_STRING)) (TOK_TABLEFILEFORMAT 'org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextInputFormat' 'org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextOutputFormat')) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -23,12 +20,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: key int, value string - if not exists: false input format: org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextOutputFormat name: base64_test - isExternal: false PREHOOK: query: CREATE TABLE base64_test(key INT, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextInputFormat' diff --git contrib/src/test/results/clientpositive/serde_regex.q.out contrib/src/test/results/clientpositive/serde_regex.q.out index 815011f..fdbbabc 100644 --- contrib/src/test/results/clientpositive/serde_regex.q.out +++ contrib/src/test/results/clientpositive/serde_regex.q.out @@ -34,9 +34,6 @@ WITH SERDEPROPERTIES ( ) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME serde_regex) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL host TOK_STRING) (TOK_TABCOL identity TOK_STRING) (TOK_TABCOL user TOK_STRING) (TOK_TABCOL time TOK_STRING) (TOK_TABCOL request TOK_STRING) (TOK_TABCOL status TOK_STRING) (TOK_TABCOL size TOK_STRING) (TOK_TABCOL referer TOK_STRING) (TOK_TABCOL agent TOK_STRING)) (TOK_TABLESERIALIZER (TOK_SERDENAME 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY "input.regex" "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))?") (TOK_TABLEPROPERTY "output.format.string" "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s"))))) TOK_TBLTEXTFILE) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -45,16 +42,13 @@ STAGE PLANS: Create Table Operator: Create Table columns: host string, identity string, user string, time string, request string, status string, size string, referer string, agent string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat serde name: org.apache.hadoop.hive.contrib.serde2.RegexSerDe serde properties: input.regex ([^ ]*) ([^ ]*) ([^ ]*) (-|\[[^\]]*\]) ([^ "]*|"[^"]*") (-|[0-9]*) (-|[0-9]*)(?: ([^ "]*|"[^"]*") ([^ "]*|"[^"]*"))? output.format.string %1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s name: serde_regex - isExternal: false PREHOOK: query: CREATE TABLE serde_regex( host STRING, diff --git contrib/src/test/results/clientpositive/serde_typedbytes.q.out contrib/src/test/results/clientpositive/serde_typedbytes.q.out index 7bb6d83..de62b52 100644 --- contrib/src/test/results/clientpositive/serde_typedbytes.q.out +++ contrib/src/test/results/clientpositive/serde_typedbytes.q.out @@ -29,9 +29,6 @@ FROM ( ) tmap INSERT OVERWRITE TABLE dest1 SELECT tkey, tvalue POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value)) (TOK_SERDE (TOK_SERDENAME 'org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe')) (TOK_RECORDWRITER 'org.apache.hadoop.hive.contrib.util.typedbytes.TypedBytesRecordWriter') 'cat' (TOK_SERDE (TOK_SERDENAME 'org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe')) (TOK_RECORDREADER 'org.apache.hadoop.hive.contrib.util.typedbytes.TypedBytesRecordReader') (TOK_ALIASLIST tkey tvalue)))))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL tkey)) (TOK_SELEXPR (TOK_TABLE_OR_COL tvalue))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -45,33 +42,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -102,12 +94,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -116,12 +106,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git contrib/src/test/results/clientpositive/serde_typedbytes2.q.out contrib/src/test/results/clientpositive/serde_typedbytes2.q.out index 87c6342..a72a99c 100644 --- contrib/src/test/results/clientpositive/serde_typedbytes2.q.out +++ contrib/src/test/results/clientpositive/serde_typedbytes2.q.out @@ -29,9 +29,6 @@ FROM ( ) tmap INSERT OVERWRITE TABLE dest1 SELECT tkey, tvalue POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_FUNCTION TOK_SMALLINT (. (TOK_TABLE_OR_COL src) key)) (. (TOK_TABLE_OR_COL src) value)) (TOK_SERDE (TOK_SERDENAME 'org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe')) (TOK_RECORDWRITER 'org.apache.hadoop.hive.contrib.util.typedbytes.TypedBytesRecordWriter') 'cat' (TOK_SERDE (TOK_SERDENAME 'org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe')) (TOK_RECORDREADER 'org.apache.hadoop.hive.contrib.util.typedbytes.TypedBytesRecordReader') (TOK_TABCOLLIST (TOK_TABCOL tkey TOK_SMALLINT) (TOK_TABCOL tvalue TOK_STRING))))))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL tkey)) (TOK_SELEXPR (TOK_TABLE_OR_COL tvalue))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -45,33 +42,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToShort(key) - type: smallint - expr: value - type: string + expressions: UDFToShort(key) (type: smallint), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: smallint - expr: _col1 - type: string + expressions: _col0 (type: smallint), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -102,12 +94,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -116,12 +106,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git contrib/src/test/results/clientpositive/serde_typedbytes3.q.out contrib/src/test/results/clientpositive/serde_typedbytes3.q.out index f0797cd..f7f3efe 100644 --- contrib/src/test/results/clientpositive/serde_typedbytes3.q.out +++ contrib/src/test/results/clientpositive/serde_typedbytes3.q.out @@ -29,9 +29,6 @@ FROM ( ) tmap INSERT OVERWRITE TABLE dest1 SELECT tkey, tvalue POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_FUNCTION TOK_SMALLINT (. (TOK_TABLE_OR_COL src) key)) (. (TOK_TABLE_OR_COL src) value)) (TOK_SERDE (TOK_SERDENAME 'org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe')) (TOK_RECORDWRITER 'org.apache.hadoop.hive.contrib.util.typedbytes.TypedBytesRecordWriter') 'cat' (TOK_SERDE (TOK_SERDENAME 'org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe')) (TOK_RECORDREADER 'org.apache.hadoop.hive.contrib.util.typedbytes.TypedBytesRecordReader') (TOK_ALIASLIST tkey tvalue)))))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL tkey)) (TOK_SELEXPR (TOK_TABLE_OR_COL tvalue))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -45,33 +42,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToShort(key) - type: smallint - expr: value - type: string + expressions: UDFToShort(key) (type: smallint), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -102,12 +94,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -116,12 +106,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git contrib/src/test/results/clientpositive/serde_typedbytes4.q.out contrib/src/test/results/clientpositive/serde_typedbytes4.q.out index 7bd9b7d..7a942da 100644 --- contrib/src/test/results/clientpositive/serde_typedbytes4.q.out +++ contrib/src/test/results/clientpositive/serde_typedbytes4.q.out @@ -31,9 +31,6 @@ FROM ( ) tmap INSERT OVERWRITE TABLE dest1 SELECT tkey, tvalue ORDER by tkey, tvalue POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_FUNCTION TOK_TINYINT (. (TOK_TABLE_OR_COL src) key)) (. (TOK_TABLE_OR_COL src) value)) (TOK_SERDE (TOK_SERDENAME 'org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe')) (TOK_RECORDWRITER 'org.apache.hadoop.hive.contrib.util.typedbytes.TypedBytesRecordWriter') 'cat' (TOK_SERDE (TOK_SERDENAME 'org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe')) (TOK_RECORDREADER 'org.apache.hadoop.hive.contrib.util.typedbytes.TypedBytesRecordReader') (TOK_ALIASLIST tkey tvalue)))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 100)))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL tkey)) (TOK_SELEXPR (TOK_TABLE_OR_COL tvalue))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL tkey)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL tvalue))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -42,52 +39,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToByte(key) - type: tinyint - expr: value - type: string + expressions: UDFToByte(key) (type: tinyint), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git contrib/src/test/results/clientpositive/serde_typedbytes5.q.out contrib/src/test/results/clientpositive/serde_typedbytes5.q.out index 5015077..1066300 100644 --- contrib/src/test/results/clientpositive/serde_typedbytes5.q.out +++ contrib/src/test/results/clientpositive/serde_typedbytes5.q.out @@ -29,9 +29,6 @@ FROM ( ) tmap INSERT OVERWRITE TABLE dest1 SELECT tkey, tvalue POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value)) (TOK_SERDE (TOK_SERDENAME 'org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe')) (TOK_RECORDWRITER 'org.apache.hadoop.hive.contrib.util.typedbytes.TypedBytesRecordWriter') 'python ../../data/scripts/cat.py' (TOK_SERDE (TOK_SERDENAME 'org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe')) (TOK_RECORDREADER 'org.apache.hadoop.hive.contrib.util.typedbytes.TypedBytesRecordReader') (TOK_ALIASLIST tkey tvalue)))))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL tkey)) (TOK_SELEXPR (TOK_TABLE_OR_COL tvalue))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -45,33 +42,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: python ../../data/scripts/cat.py output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -102,12 +94,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -116,12 +106,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git contrib/src/test/results/clientpositive/udaf_example_avg.q.out contrib/src/test/results/clientpositive/udaf_example_avg.q.out index bc9bbfb..316c23e 100644 --- contrib/src/test/results/clientpositive/udaf_example_avg.q.out +++ contrib/src/test/results/clientpositive/udaf_example_avg.q.out @@ -12,9 +12,6 @@ SELECT example_avg(substr(value,5)), example_avg(IF(substr(value,5) > 250, NULL, substr(value,5))) FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION example_avg (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5))) (TOK_SELEXPR (TOK_FUNCTION example_avg (TOK_FUNCTION IF (> (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5) 250) TOK_NULL (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -22,48 +19,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: example_avg(substr(value, 5)) - expr: example_avg(if((substr(value, 5) > 250), null, substr(value, 5))) - bucketGroup: false + aggregations: example_avg(substr(value, 5)), example_avg(if((substr(value, 5) > 250), null, substr(value, 5))) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct - expr: _col1 - type: struct + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: example_avg(VALUE._col0) - expr: example_avg(VALUE._col1) - bucketGroup: false + aggregations: example_avg(VALUE._col0), example_avg(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: double + expressions: _col0 (type: double), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out index 6723922..c1cc25e 100644 --- contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out +++ contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out @@ -12,9 +12,6 @@ SELECT substr(value,5,1), example_group_concat("(", key, ":", value, ")") FROM src GROUP BY substr(value,5,1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5 1)) (TOK_SELEXPR (TOK_FUNCTION example_group_concat "(" (TOK_TABLE_OR_COL key) ":" (TOK_TABLE_OR_COL value) ")"))) (TOK_GROUPBY (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5 1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -22,58 +19,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: example_group_concat('(', key, ':', value, ')') - bucketGroup: false - keys: - expr: substr(value, 5, 1) - type: string + aggregations: example_group_concat('(', key, ':', value, ')') + keys: substr(value, 5, 1) (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: array + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: array) Reduce Operator Tree: Group By Operator - aggregations: - expr: example_group_concat(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: example_group_concat(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git contrib/src/test/results/clientpositive/udaf_example_max.q.out contrib/src/test/results/clientpositive/udaf_example_max.q.out index 99e18ab..096e1d3 100644 --- contrib/src/test/results/clientpositive/udaf_example_max.q.out +++ contrib/src/test/results/clientpositive/udaf_example_max.q.out @@ -17,9 +17,6 @@ SELECT example_max(substr(value,5)), example_max(IF(substr(value,5) > 250, NULL, substr(value,5))) FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION example_max (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5))) (TOK_SELEXPR (TOK_FUNCTION example_max (TOK_FUNCTION IF (> (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5) 250) TOK_NULL (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -27,48 +24,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: example_max(substr(value, 5)) - expr: example_max(if((substr(value, 5) > 250), null, substr(value, 5))) - bucketGroup: false + aggregations: example_max(substr(value, 5)), example_max(if((substr(value, 5) > 250), null, substr(value, 5))) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: example_max(VALUE._col0) - expr: example_max(VALUE._col1) - bucketGroup: false + aggregations: example_max(VALUE._col0), example_max(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git contrib/src/test/results/clientpositive/udaf_example_max_n.q.out contrib/src/test/results/clientpositive/udaf_example_max_n.q.out index ed73822..d779d2e 100644 --- contrib/src/test/results/clientpositive/udaf_example_max_n.q.out +++ contrib/src/test/results/clientpositive/udaf_example_max_n.q.out @@ -12,9 +12,6 @@ SELECT example_max_n(substr(value,5),10), example_max_n(IF(substr(value,5) > 250, NULL, substr(value,5)),10) FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION example_max_n (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5) 10)) (TOK_SELEXPR (TOK_FUNCTION example_max_n (TOK_FUNCTION IF (> (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5) 250) TOK_NULL (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5)) 10))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -22,48 +19,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: example_max_n(substr(value, 5), 10) - expr: example_max_n(if((substr(value, 5) > 250), null, substr(value, 5)), 10) - bucketGroup: false + aggregations: example_max_n(substr(value, 5), 10), example_max_n(if((substr(value, 5) > 250), null, substr(value, 5)), 10) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct,n:int> - expr: _col1 - type: struct,n:int> + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct,n:int>), _col1 (type: struct,n:int>) Reduce Operator Tree: Group By Operator - aggregations: - expr: example_max_n(VALUE._col0) - expr: example_max_n(VALUE._col1) - bucketGroup: false + aggregations: example_max_n(VALUE._col0), example_max_n(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: _col0 - type: array - expr: _col1 - type: array + expressions: _col0 (type: array), _col1 (type: array) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git contrib/src/test/results/clientpositive/udaf_example_min.q.out contrib/src/test/results/clientpositive/udaf_example_min.q.out index 04bbb84..620afbd 100644 --- contrib/src/test/results/clientpositive/udaf_example_min.q.out +++ contrib/src/test/results/clientpositive/udaf_example_min.q.out @@ -17,9 +17,6 @@ SELECT example_min(substr(value,5)), example_min(IF(substr(value,5) > 250, NULL, substr(value,5))) FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION example_min (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5))) (TOK_SELEXPR (TOK_FUNCTION example_min (TOK_FUNCTION IF (> (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5) 250) TOK_NULL (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -27,48 +24,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: example_min(substr(value, 5)) - expr: example_min(if((substr(value, 5) > 250), null, substr(value, 5))) - bucketGroup: false + aggregations: example_min(substr(value, 5)), example_min(if((substr(value, 5) > 250), null, substr(value, 5))) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: example_min(VALUE._col0) - expr: example_min(VALUE._col1) - bucketGroup: false + aggregations: example_min(VALUE._col0), example_min(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git contrib/src/test/results/clientpositive/udaf_example_min_n.q.out contrib/src/test/results/clientpositive/udaf_example_min_n.q.out index 94ee89d..78556e3 100644 --- contrib/src/test/results/clientpositive/udaf_example_min_n.q.out +++ contrib/src/test/results/clientpositive/udaf_example_min_n.q.out @@ -12,9 +12,6 @@ SELECT example_min_n(substr(value,5),10), example_min_n(IF(substr(value,5) < 250, NULL, substr(value,5)),10) FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION example_min_n (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5) 10)) (TOK_SELEXPR (TOK_FUNCTION example_min_n (TOK_FUNCTION IF (< (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5) 250) TOK_NULL (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5)) 10))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -22,48 +19,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: example_min_n(substr(value, 5), 10) - expr: example_min_n(if((substr(value, 5) < 250), null, substr(value, 5)), 10) - bucketGroup: false + aggregations: example_min_n(substr(value, 5), 10), example_min_n(if((substr(value, 5) < 250), null, substr(value, 5)), 10) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct,n:int> - expr: _col1 - type: struct,n:int> + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct,n:int>), _col1 (type: struct,n:int>) Reduce Operator Tree: Group By Operator - aggregations: - expr: example_min_n(VALUE._col0) - expr: example_min_n(VALUE._col1) - bucketGroup: false + aggregations: example_min_n(VALUE._col0), example_min_n(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: _col0 - type: array - expr: _col1 - type: array + expressions: _col0 (type: array), _col1 (type: array) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git contrib/src/test/results/clientpositive/udf_example_add.q.out contrib/src/test/results/clientpositive/udf_example_add.q.out index 7031dea..8c2f519 100644 --- contrib/src/test/results/clientpositive/udf_example_add.q.out +++ contrib/src/test/results/clientpositive/udf_example_add.q.out @@ -22,9 +22,6 @@ SELECT example_add(1, 2), example_add(1, 2, 3, 4.4) FROM src LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION example_add 1 2)) (TOK_SELEXPR (TOK_FUNCTION example_add 1 2 3)) (TOK_SELEXPR (TOK_FUNCTION example_add 1 2 3 4)) (TOK_SELEXPR (TOK_FUNCTION example_add 1.1 2.2)) (TOK_SELEXPR (TOK_FUNCTION example_add 1.1 2.2 3.3)) (TOK_SELEXPR (TOK_FUNCTION example_add 1.1 2.2 3.3 4.4)) (TOK_SELEXPR (TOK_FUNCTION example_add 1 2 3 4.4))) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -32,31 +29,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: example_add(1, 2) - type: int - expr: example_add(1, 2, 3) - type: int - expr: example_add(1, 2, 3, 4) - type: int - expr: example_add(1.1, 2.2) - type: double - expr: example_add(1.1, 2.2, 3.3) - type: double - expr: example_add(1.1, 2.2, 3.3, 4.4) - type: double - expr: example_add(1, 2, 3, 4.4) - type: double + expressions: example_add(1, 2) (type: int), example_add(1, 2, 3) (type: int), example_add(1, 2, 3, 4) (type: int), example_add(1.1, 2.2) (type: double), example_add(1.1, 2.2, 3.3) (type: double), example_add(1.1, 2.2, 3.3, 4.4) (type: double), example_add(1, 2, 3, 4.4) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git contrib/src/test/results/clientpositive/udf_example_arraymapstruct.q.out contrib/src/test/results/clientpositive/udf_example_arraymapstruct.q.out index 147dcf9..43f31df 100644 --- contrib/src/test/results/clientpositive/udf_example_arraymapstruct.q.out +++ contrib/src/test/results/clientpositive/udf_example_arraymapstruct.q.out @@ -18,9 +18,6 @@ POSTHOOK: query: EXPLAIN SELECT example_arraysum(lint), example_mapconcat(mstringstring), example_structprint(lintstring[0]) FROM src_thrift POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION example_arraysum (TOK_TABLE_OR_COL lint))) (TOK_SELEXPR (TOK_FUNCTION example_mapconcat (TOK_TABLE_OR_COL mstringstring))) (TOK_SELEXPR (TOK_FUNCTION example_structprint ([ (TOK_TABLE_OR_COL lintstring) 0)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -28,22 +25,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_thrift + Map Operator Tree: TableScan alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: example_arraysum(lint) - type: double - expr: example_mapconcat(mstringstring) - type: string - expr: example_structprint(lintstring[0]) - type: string + expressions: example_arraysum(lint) (type: double), example_mapconcat(mstringstring) (type: string), example_structprint(lintstring[0]) (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git contrib/src/test/results/clientpositive/udf_example_format.q.out contrib/src/test/results/clientpositive/udf_example_format.q.out index e69e231..99255f0 100644 --- contrib/src/test/results/clientpositive/udf_example_format.q.out +++ contrib/src/test/results/clientpositive/udf_example_format.q.out @@ -16,9 +16,6 @@ SELECT example_format("abc"), example_format("%1$x %2$o %3$d", 10, 10, 10) FROM src LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION example_format "abc")) (TOK_SELEXPR (TOK_FUNCTION example_format "%1$s" 1.1)) (TOK_SELEXPR (TOK_FUNCTION example_format "%1$s %2$e" 1.1 1.2)) (TOK_SELEXPR (TOK_FUNCTION example_format "%1$x %2$o %3$d" 10 10 10))) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -26,25 +23,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: example_format('abc') - type: string - expr: example_format('%1$s', 1.1) - type: string - expr: example_format('%1$s %2$e', 1.1, 1.2) - type: string - expr: example_format('%1$x %2$o %3$d', 10, 10, 10) - type: string + expressions: example_format('abc') (type: string), example_format('%1$s', 1.1) (type: string), example_format('%1$s %2$e', 1.1, 1.2) (type: string), example_format('%1$x %2$o %3$d', 10, 10, 10) (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git contrib/src/test/results/clientpositive/udf_row_sequence.q.out contrib/src/test/results/clientpositive/udf_row_sequence.q.out index 79fb28a..7573ec5 100644 --- contrib/src/test/results/clientpositive/udf_row_sequence.q.out +++ contrib/src/test/results/clientpositive/udf_row_sequence.q.out @@ -31,9 +31,6 @@ select key, row_sequence() as r from (select key from src order by key) x order by r POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION row_sequence) r)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL r))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -42,36 +39,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: row_sequence() - type: bigint + expressions: _col0 (type: string), row_sequence() (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -79,25 +68,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: bigint + key expressions: _col1 (type: bigint) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git hbase-handler/src/test/results/positive/external_table_ppd.q.out hbase-handler/src/test/results/positive/external_table_ppd.q.out index 1a8f06c..2755525 100644 --- hbase-handler/src/test/results/positive/external_table_ppd.q.out +++ hbase-handler/src/test/results/positive/external_table_ppd.q.out @@ -114,9 +114,6 @@ PREHOOK: query: explain SELECT * FROM t_hbase where int_col > 0 PREHOOK: type: QUERY POSTHOOK: query: explain SELECT * FROM t_hbase where int_col > 0 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t_hbase))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL int_col) 0)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -124,39 +121,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t_hbase + Map Operator Tree: TableScan alias: t_hbase filterExpr: - expr: (int_col > 0) - type: boolean + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (int_col > 0) - type: boolean + predicate: (int_col > 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: tinyint_col - type: tinyint - expr: smallint_col - type: smallint - expr: int_col - type: int - expr: bigint_col - type: bigint - expr: float_col - type: float - expr: double_col - type: double - expr: boolean_col - type: boolean + expressions: key (type: string), tinyint_col (type: tinyint), smallint_col (type: smallint), int_col (type: int), bigint_col (type: bigint), float_col (type: float), double_col (type: double), boolean_col (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out index 952e2c2..51acf28 100644 --- hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out +++ hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out @@ -25,9 +25,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- with full pushdown explain select * from hbase_pushdown where key>'90' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL key) '90')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -35,27 +32,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_pushdown + Map Operator Tree: TableScan alias: hbase_pushdown filterExpr: - expr: (key > '90') - type: boolean + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (key > '90') - type: boolean + predicate: (key > '90') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -188,9 +179,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- with cnostant expressinon explain select * from hbase_pushdown where key>=cast(40 + 50 as string) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (>= (TOK_TABLE_OR_COL key) (TOK_FUNCTION TOK_STRING (+ 40 50)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -198,27 +186,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_pushdown + Map Operator Tree: TableScan alias: hbase_pushdown filterExpr: - expr: (key >= UDFToString((40 + 50))) - type: boolean + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (key >= UDFToString((40 + 50))) - type: boolean + predicate: (key >= UDFToString((40 + 50))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -250,9 +232,6 @@ POSTHOOK: query: -- with partial pushdown explain select * from hbase_pushdown where key>'90' and value like '%9%' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) '90') (like (TOK_TABLE_OR_COL value) '%9%'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -260,27 +239,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_pushdown + Map Operator Tree: TableScan alias: hbase_pushdown filterExpr: - expr: (key > '90') - type: boolean + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (value like '%9%') - type: boolean + predicate: (value like '%9%') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -313,9 +286,6 @@ POSTHOOK: query: -- with two residuals explain select * from hbase_pushdown where key>='90' and value like '%9%' and key=cast(value as int) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (>= (TOK_TABLE_OR_COL key) '90') (like (TOK_TABLE_OR_COL value) '%9%')) (= (TOK_TABLE_OR_COL key) (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL value))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -323,27 +293,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_pushdown + Map Operator Tree: TableScan alias: hbase_pushdown filterExpr: - expr: (key >= '90') - type: boolean + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: ((value like '%9%') and (key = UDFToInteger(value))) - type: boolean + predicate: ((value like '%9%') and (key = UDFToInteger(value))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -373,9 +337,6 @@ POSTHOOK: query: -- with contradictory pushdowns explain select * from hbase_pushdown where key<'80' and key>'90' and value like '%90%' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (< (TOK_TABLE_OR_COL key) '80') (> (TOK_TABLE_OR_COL key) '90')) (like (TOK_TABLE_OR_COL value) '%90%'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -383,27 +344,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_pushdown + Map Operator Tree: TableScan alias: hbase_pushdown filterExpr: - expr: ((key < '80') and (key > '90')) - type: boolean + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (value like '%90%') - type: boolean + predicate: (value like '%90%') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -431,9 +386,6 @@ POSTHOOK: query: -- with nothing to push down explain select * from hbase_pushdown POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -444,13 +396,11 @@ STAGE PLANS: Processor Tree: TableScan alias: hbase_pushdown + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE ListSink PREHOOK: query: -- with a predicate which is not actually part of the filter, so @@ -465,9 +415,6 @@ POSTHOOK: query: -- with a predicate which is not actually part of the filter, s explain select * from hbase_pushdown where (case when key<'90' then 2 else 4 end) > 3 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_FUNCTION when (< (TOK_TABLE_OR_COL key) '90') 2 4) 3)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -475,24 +422,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_pushdown + Map Operator Tree: TableScan alias: hbase_pushdown + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (CASE WHEN ((key < '90')) THEN (2) ELSE (4) END > 3) - type: boolean + predicate: (CASE WHEN ((key < '90')) THEN (2) ELSE (4) END > 3) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -514,9 +457,6 @@ POSTHOOK: query: -- with a predicate which is under an OR, so it should explain select * from hbase_pushdown where key<='80' or value like '%90%' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (<= (TOK_TABLE_OR_COL key) '80') (like (TOK_TABLE_OR_COL value) '%90%'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -524,24 +464,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_pushdown + Map Operator Tree: TableScan alias: hbase_pushdown + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: ((key <= '80') or (value like '%90%')) - type: boolean + predicate: ((key <= '80') or (value like '%90%')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -559,9 +495,6 @@ POSTHOOK: query: -- following will get pushed into hbase after HIVE-2819 explain select * from hbase_pushdown where key > '281' and key < '287' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) '281') (< (TOK_TABLE_OR_COL key) '287'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -569,27 +502,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_pushdown + Map Operator Tree: TableScan alias: hbase_pushdown filterExpr: - expr: ((key > '281') and (key < '287')) - type: boolean + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: ((key > '281') and (key < '287')) - type: boolean + predicate: ((key > '281') and (key < '287')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -622,9 +549,6 @@ POSTHOOK: query: -- with pushdown disabled explain select * from hbase_pushdown where key<='90' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (<= (TOK_TABLE_OR_COL key) '90')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -632,24 +556,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_pushdown + Map Operator Tree: TableScan alias: hbase_pushdown + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (key <= '90') - type: boolean + predicate: (key <= '90') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git hbase-handler/src/test/results/positive/hbase_pushdown.q.out hbase-handler/src/test/results/positive/hbase_pushdown.q.out index e3863d8..14ae7ca 100644 --- hbase-handler/src/test/results/positive/hbase_pushdown.q.out +++ hbase-handler/src/test/results/positive/hbase_pushdown.q.out @@ -25,9 +25,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- with full pushdown explain select * from hbase_pushdown where key=90 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 90)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -35,27 +32,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_pushdown + Map Operator Tree: TableScan alias: hbase_pushdown filterExpr: - expr: (key = 90) - type: boolean + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (key = 90) - type: boolean + predicate: (key = 90) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -82,9 +73,6 @@ POSTHOOK: query: -- with partial pushdown explain select * from hbase_pushdown where key=90 and value like '%90%' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL key) 90) (like (TOK_TABLE_OR_COL value) '%90%'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -92,27 +80,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_pushdown + Map Operator Tree: TableScan alias: hbase_pushdown filterExpr: - expr: (key = 90) - type: boolean + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (value like '%90%') - type: boolean + predicate: (value like '%90%') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -141,9 +123,6 @@ POSTHOOK: query: -- with two residuals explain select * from hbase_pushdown where key=90 and value like '%90%' and key=cast(value as int) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL key) 90) (like (TOK_TABLE_OR_COL value) '%90%')) (= (TOK_TABLE_OR_COL key) (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL value))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -151,27 +130,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_pushdown + Map Operator Tree: TableScan alias: hbase_pushdown filterExpr: - expr: (key = 90) - type: boolean + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: ((value like '%90%') and (key = UDFToInteger(value))) - type: boolean + predicate: ((value like '%90%') and (key = UDFToInteger(value))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -191,9 +164,6 @@ POSTHOOK: query: -- with contradictory pushdowns explain select * from hbase_pushdown where key=80 and key=90 and value like '%90%' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL key) 80) (= (TOK_TABLE_OR_COL key) 90)) (like (TOK_TABLE_OR_COL value) '%90%'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -201,24 +171,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_pushdown + Map Operator Tree: TableScan alias: hbase_pushdown + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (((key = 80) and (key = 90)) and (value like '%90%')) - type: boolean + predicate: (((key = 80) and (key = 90)) and (value like '%90%')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -246,9 +212,6 @@ POSTHOOK: query: -- with nothing to push down explain select * from hbase_pushdown POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -259,13 +222,11 @@ STAGE PLANS: Processor Tree: TableScan alias: hbase_pushdown + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE ListSink PREHOOK: query: -- with a predicate which is not actually part of the filter, so @@ -280,9 +241,6 @@ POSTHOOK: query: -- with a predicate which is not actually part of the filter, s explain select * from hbase_pushdown where (case when key=90 then 2 else 4 end) > 3 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_FUNCTION when (= (TOK_TABLE_OR_COL key) 90) 2 4) 3)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -290,24 +248,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_pushdown + Map Operator Tree: TableScan alias: hbase_pushdown + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (CASE WHEN ((key = 90)) THEN (2) ELSE (4) END > 3) - type: boolean + predicate: (CASE WHEN ((key = 90)) THEN (2) ELSE (4) END > 3) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -329,9 +283,6 @@ POSTHOOK: query: -- with a predicate which is under an OR, so it should explain select * from hbase_pushdown where key=80 or value like '%90%' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (= (TOK_TABLE_OR_COL key) 80) (like (TOK_TABLE_OR_COL value) '%90%'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -339,24 +290,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_pushdown + Map Operator Tree: TableScan alias: hbase_pushdown + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: ((key = 80) or (value like '%90%')) - type: boolean + predicate: ((key = 80) or (value like '%90%')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -374,9 +321,6 @@ POSTHOOK: query: -- with pushdown disabled explain select * from hbase_pushdown where key=90 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 90)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -384,24 +328,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_pushdown + Map Operator Tree: TableScan alias: hbase_pushdown + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (key = 90) - type: boolean + predicate: (key = 90) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git hbase-handler/src/test/results/positive/hbase_queries.q.out hbase-handler/src/test/results/positive/hbase_queries.q.out index 2768755..945f5f0 100644 --- hbase-handler/src/test/results/positive/hbase_queries.q.out +++ hbase-handler/src/test/results/positive/hbase_queries.q.out @@ -33,33 +33,26 @@ PREHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE hbase_table_1 SELECT * W PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE hbase_table_1 SELECT * WHERE (key%2)=0 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME hbase_table_1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (% (TOK_TABLE_OR_COL key) 2) 0)))) - STAGE DEPENDENCIES: Stage-0 is a root stage STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key % 2) = 0) - type: boolean + predicate: ((key % 2) = 0) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string + expressions: UDFToInteger(key) (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat @@ -107,9 +100,6 @@ JOIN ON (x.key = Y.key) ORDER BY key, value LIMIT 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_table_1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME hbase_table_1)))))) x) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))))) Y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL Y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME Y)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -118,48 +108,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x:hbase_table_1 + Map Operator Tree: TableScan alias: hbase_table_1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(_col0) - type: double + key expressions: UDFToDouble(_col0) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(_col0) - type: double - tag: 0 - y:src + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(_col0) - type: double + key expressions: UDFToDouble(_col0) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(_col0) - type: double - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -167,18 +141,14 @@ STAGE PLANS: condition expressions: 0 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -186,28 +156,22 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 4120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -277,9 +241,6 @@ JOIN ON (x.key = Y.key) ORDER BY key, value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_table_1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME hbase_table_1)))) (TOK_WHERE (< 100 (. (TOK_TABLE_OR_COL hbase_table_1) key))))) x) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_table_2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME hbase_table_2)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL hbase_table_2) key) 120)))) Y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL Y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME Y)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -288,56 +249,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x:hbase_table_1 + Map Operator Tree: TableScan - alias: hbase_table_1 + alias: hbase_table_2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (100 < key) - type: boolean + predicate: (key < 120) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - y:hbase_table_2 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) TableScan - alias: hbase_table_2 + alias: hbase_table_1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (key < 120) - type: boolean + predicate: (100 < key) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -345,18 +288,14 @@ STAGE PLANS: condition expressions: 0 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: int - expr: _col3 - type: string + expressions: _col2 (type: int), _col3 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -364,27 +303,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -521,9 +452,6 @@ JOIN (SELECT src.key, count(src.key) as count FROM src GROUP BY src.key) Y ON (x.key = Y.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_table_1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME hbase_table_1)))))) x) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) key)) count)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) Y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL Y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME hbase_table_3))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL Y) count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -531,56 +459,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - y:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(key) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(key) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -588,45 +499,27 @@ STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - $INTNAME - TableScan - Reduce Output Operator - key expressions: - expr: UDFToDouble(_col0) - type: double - sort order: + - Map-reduce partition columns: - expr: UDFToDouble(_col0) - type: double - tag: 1 - value expressions: - expr: _col1 - type: bigint - x:hbase_table_1 + Map Operator Tree: TableScan alias: hbase_table_1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(_col0) - type: double + key expressions: UDFToDouble(_col0) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(_col0) - type: double - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -634,20 +527,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: UDFToInteger(_col3) - type: int + expressions: _col0 (type: int), _col1 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat diff --git hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out index dec57b1..e4d2f0b 100644 --- hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out +++ hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out @@ -28,9 +28,6 @@ select key,"" where a.key > 0 AND a.key < 50 insert overwrite table src_x2 select value,"" where a.key > 50 AND a.key < 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_x1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR "")) (TOK_WHERE (AND (> (. (TOK_TABLE_OR_COL a) key) 0) (< (. (TOK_TABLE_OR_COL a) key) 50)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_x2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR "")) (TOK_WHERE (AND (> (. (TOK_TABLE_OR_COL a) key) 50) (< (. (TOK_TABLE_OR_COL a) key) 100))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -44,43 +41,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 0) and (key < 50)) - type: boolean + predicate: ((key > 0) and (key < 50)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: '' - type: string + expressions: key (type: string), '' (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 Filter Operator - predicate: - expr: ((key > 50) and (key < 100)) - type: boolean + predicate: ((key > 50) and (key < 100)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: '' - type: string + expressions: value (type: string), '' (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat @@ -111,12 +100,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -125,12 +112,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git hbase-handler/src/test/results/positive/ppd_key_ranges.q.out hbase-handler/src/test/results/positive/ppd_key_ranges.q.out index bf8fff9..2186fd8 100644 --- hbase-handler/src/test/results/positive/ppd_key_ranges.q.out +++ hbase-handler/src/test/results/positive/ppd_key_ranges.q.out @@ -23,9 +23,6 @@ PREHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key < PREHOOK: type: QUERY POSTHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key < 21 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_ppd_keyrange))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 8) (< (TOK_TABLE_OR_COL key) 21))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -33,27 +30,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_ppd_keyrange + Map Operator Tree: TableScan alias: hbase_ppd_keyrange filterExpr: - expr: ((key > 8) and (key < 21)) - type: boolean + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: ((key > 8) and (key < 21)) - type: boolean + predicate: ((key > 8) and (key < 21)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -84,9 +75,6 @@ PREHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key < PREHOOK: type: QUERY POSTHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key <= 17 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_ppd_keyrange))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 8) (<= (TOK_TABLE_OR_COL key) 17))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -94,27 +82,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_ppd_keyrange + Map Operator Tree: TableScan alias: hbase_ppd_keyrange filterExpr: - expr: ((key > 8) and (key <= 17)) - type: boolean + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: ((key > 8) and (key <= 17)) - type: boolean + predicate: ((key > 8) and (key <= 17)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -142,9 +124,6 @@ PREHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key < PREHOOK: type: QUERY POSTHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key <= 17 and value like '%11%' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_ppd_keyrange))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (> (TOK_TABLE_OR_COL key) 8) (<= (TOK_TABLE_OR_COL key) 17)) (like (TOK_TABLE_OR_COL value) '%11%'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -152,27 +131,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_ppd_keyrange + Map Operator Tree: TableScan alias: hbase_ppd_keyrange filterExpr: - expr: ((key > 8) and (key <= 17)) - type: boolean + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (value like '%11%') - type: boolean + predicate: (value like '%11%') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -195,9 +168,6 @@ PREHOOK: query: explain select * from hbase_ppd_keyrange where key >= 9 and key PREHOOK: type: QUERY POSTHOOK: query: explain select * from hbase_ppd_keyrange where key >= 9 and key < 17 and key = 11 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_ppd_keyrange))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (>= (TOK_TABLE_OR_COL key) 9) (< (TOK_TABLE_OR_COL key) 17)) (= (TOK_TABLE_OR_COL key) 11))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -205,24 +175,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - hbase_ppd_keyrange + Map Operator Tree: TableScan alias: hbase_ppd_keyrange + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (((key >= 9) and (key < 17)) and (key = 11)) - type: boolean + predicate: (((key >= 9) and (key < 17)) and (key = 11)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index d301be7..fe929fc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -32,6 +32,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.LinkedList; import java.util.Map; import java.util.Map.Entry; import java.util.Set; @@ -45,6 +46,7 @@ import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger; import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.ExplainWork; +import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.io.IOUtils; @@ -121,8 +123,20 @@ public JSONObject getJSONLogicalPlan(PrintStream out, ExplainWork work) throws E out = null; } + // Print out the parse AST + if (work.getAstStringTree() != null) { + String jsonAST = outputAST(work.getAstStringTree(), out, jsonOutput, 0); + if (out != null) { + out.println(); + } + + if (jsonOutput) { + outJSONObject.put("ABSTRACT SYNTAX TREE", jsonAST); + } + } + if (work.getParseContext() != null) { - out.print("LOGICAL PLAN"); + out.print("LOGICAL PLAN:"); JSONObject jsonPlan = outputMap(work.getParseContext().getTopOps(), true, out, jsonOutput, work.getExtended(), 0); if (out != null) { @@ -149,7 +163,7 @@ public JSONObject getJSONPlan(PrintStream out, ExplainWork work) } // Print out the parse AST - if (work.getAstStringTree() != null) { + if (work.getAstStringTree() != null && work.getExtended()) { String jsonAST = outputAST(work.getAstStringTree(), out, jsonOutput, 0); if (out != null) { out.println(); @@ -263,7 +277,45 @@ private JSONObject outputMap(Map mp, boolean hasHeader, PrintStream out, json.put(ent.getKey().toString(), ent.getValue().toString()); } } - else if (ent.getValue() instanceof List || ent.getValue() instanceof Map) { + else if (ent.getValue() instanceof List) { + if (ent.getValue() != null && !((List)ent.getValue()).isEmpty() + && ((List)ent.getValue()).get(0) != null && + ((List)ent.getValue()).get(0) instanceof TezWork.Dependency) { + if (out != null) { + boolean isFirst = true; + for (TezWork.Dependency dep: (List)ent.getValue()) { + if (!isFirst) { + out.print(", "); + } else { + out.print("<- "); + isFirst = false; + } + out.print(dep.getName()); + out.print(" ("); + out.print(dep.getType()); + out.print(")"); + } + out.println(); + } + if (jsonOutput) { + for (TezWork.Dependency dep: (List)ent.getValue()) { + JSONObject jsonDep = new JSONObject(); + jsonDep.put("parent", dep.getName()); + jsonDep.put("type", dep.getType()); + json.accumulate(ent.getKey().toString(), jsonDep); + } + } + } else { + if (out != null) { + out.print(ent.getValue().toString()); + out.println(); + } + if (jsonOutput) { + json.put(ent.getKey().toString(), ent.getValue().toString()); + } + } + } + else if (ent.getValue() instanceof Map) { if (out != null) { out.print(ent.getValue().toString()); out.println(); @@ -385,7 +437,7 @@ private JSONObject outputPlan(Serializable work, PrintStream out, JSONObject jsonOut = outputPlan(operator.getConf(), out, extended, jsonOutput, jsonOutput ? 0 : indent, appender); if (jsonOutput) { - json.put(operator.getOperatorId(), jsonOut); + json = jsonOut; } } @@ -396,19 +448,13 @@ private JSONObject outputPlan(Serializable work, PrintStream out, for (Operator op : operator.getChildOperators()) { JSONObject jsonOut = outputPlan(op, out, extended, jsonOutput, cindent); if (jsonOutput) { - json.put(operator.getOperatorId(), jsonOut); + ((JSONObject)json.get(JSONObject.getNames(json)[0])).accumulate("children", jsonOut); } } } } if (jsonOutput) { - if (keyJSONObject != null) { - JSONObject ret = new JSONObject(); - ret.put(keyJSONObject, json); - return ret; - } - return json; } return null; @@ -461,7 +507,7 @@ private JSONObject outputPlan(Serializable work, PrintStream out, } out.println(val); } - if (jsonOutput) { + if (jsonOutput && shouldPrint(xpl_note, val)) { json.put(header, val.toString()); } continue; @@ -486,7 +532,7 @@ private JSONObject outputPlan(Serializable work, PrintStream out, } JSONObject jsonOut = outputMap(mp, !skipHeader && !emptyHeader, out, extended, jsonOutput, ind); - if (jsonOutput) { + if (jsonOutput && !mp.isEmpty()) { json.put(header, jsonOut); } continue; @@ -497,7 +543,15 @@ private JSONObject outputPlan(Serializable work, PrintStream out, // Try this as a list try { - List l = (List) val; + List l; + + try { + l = (List) val; + } catch (ClassCastException e) { + Set s = (Set) val; + l = new LinkedList(); + l.addAll(s); + } if (out != null && !skipHeader && l != null && !l.isEmpty()) { out.print(header); @@ -505,7 +559,7 @@ private JSONObject outputPlan(Serializable work, PrintStream out, JSONArray jsonOut = outputList(l, out, !skipHeader && !emptyHeader, extended, jsonOutput, ind); - if (jsonOutput) { + if (jsonOutput && !l.isEmpty()) { json.put(header, jsonOut); } @@ -524,7 +578,13 @@ private JSONObject outputPlan(Serializable work, PrintStream out, } JSONObject jsonOut = outputPlan(s, out, extended, jsonOutput, ind); if (jsonOutput) { - json.put(header, jsonOut); + if (!skipHeader) { + json.put(header, jsonOut); + } else { + for(String k: JSONObject.getNames(jsonOut)) { + json.put(k, jsonOut.get(k)); + } + } } continue; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java index c6f431c..22a21c9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java @@ -266,8 +266,7 @@ int close(TezWork work, int rc) { try { List ws = work.getAllWork(); for (BaseWork w: ws) { - List> ops = w.getAllOperators(); - for (Operator op: ops) { + for (Operator op: w.getAllOperators()) { op.jobClose(conf, rc == 0); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java index 5834af7..c8dbe97 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java @@ -102,22 +102,28 @@ public void setOrigin(ASTNodeOrigin origin) { } public String dump() { - StringBuilder sb = new StringBuilder(); + StringBuilder sb = new StringBuilder("\n"); + dump(sb, ""); + return sb.toString(); + } - sb.append('('); + private StringBuilder dump(StringBuilder sb, String ws) { + sb.append(ws); sb.append(toString()); + sb.append("\n"); + ArrayList children = getChildren(); if (children != null) { for (Node node : getChildren()) { if (node instanceof ASTNode) { - sb.append(((ASTNode) node).dump()); + ((ASTNode) node).dump(sb, ws + " "); } else { - sb.append("NON-ASTNODE!!"); + sb.append(ws); + sb.append(" NON-ASTNODE!!"); + sb.append("\n"); } } } - sb.append(')'); - return sb.toString(); + return sb; } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index 359f7ed..77fb8bd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -80,7 +80,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { pCtx, tasks, fetchTask, - input.toStringTree(), + input.dump(), sem.getInputs(), extended, formatted, diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java index 24694ef..0c10c30 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java @@ -20,11 +20,11 @@ public class AbstractOperatorDesc implements OperatorDesc { - private boolean vectorMode = false; + protected boolean vectorMode = false; protected transient Statistics statistics; @Override - @Explain(displayName = "Statistics", normalExplain = false) + @Explain(skipHeader = true, displayName = "Statistics") public Statistics getStatistics() { return statistics; } @@ -39,11 +39,6 @@ public Object clone() throws CloneNotSupportedException { throw new CloneNotSupportedException("clone not supported"); } - @Explain(displayName = "Vectorized execution", displayOnlyOnTrue = true) - public boolean getVectorModeOn() { - return vectorMode; - } - public void setVectorMode(boolean vm) { this.vectorMode = vm; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AggregationDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AggregationDesc.java index b35b313..17eeae1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AggregationDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AggregationDesc.java @@ -139,7 +139,6 @@ public void setMode(final GenericUDAFEvaluator.Mode mode) { return mode; } - @Explain(displayName = "expr") public String getExprString() { StringBuilder sb = new StringBuilder(); sb.append(genericUDAFName); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AlterDatabaseDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AlterDatabaseDesc.java index 7e081d2..16bb95b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AlterDatabaseDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AlterDatabaseDesc.java @@ -58,7 +58,7 @@ public AlterDatabaseDesc(String databaseName, boolean ifNotExists) { - @Explain(displayName="if not exists") + @Explain(displayName="if not exists", displayOnlyOnTrue = true) public boolean getIfNotExists() { return ifNotExists; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index e8c3145..eb85446 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -20,7 +20,10 @@ import java.util.ArrayList; import java.util.LinkedList; +import java.util.LinkedHashSet; import java.util.List; +import java.util.Set; +import java.util.Stack; import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.Operator; @@ -79,22 +82,25 @@ public void addDummyOp(HashTableDummyOperator dummyOp) { dummyOps.add(dummyOp); } - protected abstract List> getAllRootOperators(); + protected abstract Set> getAllRootOperators(); - public List> getAllOperators() { + public Set> getAllOperators() { - List> returnList = new ArrayList>(); - List> opList = getAllRootOperators(); + Set> returnSet = new LinkedHashSet>(); + Set> opSet = getAllRootOperators(); + Stack> opStack = new Stack>(); - //recursively add all children - while (!opList.isEmpty()) { - Operator op = opList.remove(0); + // add all children + opStack.addAll(opSet); + + while(!opStack.empty()) { + Operator op = opStack.pop(); + returnSet.add(op); if (op.getChildOperators() != null) { - opList.addAll(op.getChildOperators()); + opStack.addAll(op.getChildOperators()); } - returnList.add(op); } - return returnList; + return returnSet; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/CreateDatabaseDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/CreateDatabaseDesc.java index 1148a2d..a6b52aa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/CreateDatabaseDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CreateDatabaseDesc.java @@ -58,7 +58,7 @@ public CreateDatabaseDesc(String databaseName, boolean ifNotExists) { - @Explain(displayName="if not exists") + @Explain(displayName="if not exists", displayOnlyOnTrue = true) public boolean getIfNotExists() { return ifNotExists; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java index d1b729c..98c511e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java @@ -148,7 +148,7 @@ public CreateTableDesc(String tableName, boolean isExternal, return Utilities.getFieldSchemaString(getPartCols()); } - @Explain(displayName = "if not exists") + @Explain(displayName = "if not exists", displayOnlyOnTrue = true) public boolean getIfNotExists() { return ifNotExists; } @@ -196,6 +196,14 @@ public void setBucketCols(ArrayList bucketCols) { } @Explain(displayName = "# buckets") + public Integer getNumBucketsExplain() { + if (numBuckets == -1) { + return null; + } else { + return numBuckets; + } + } + public int getNumBuckets() { return numBuckets; } @@ -294,7 +302,7 @@ public void setLocation(String location) { this.location = location; } - @Explain(displayName = "isExternal") + @Explain(displayName = "isExternal", displayOnlyOnTrue = true) public boolean isExternal() { return isExternal; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableLikeDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableLikeDesc.java index 6705059..cb5d64c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableLikeDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableLikeDesc.java @@ -58,7 +58,7 @@ public CreateTableLikeDesc(String tableName, boolean isExternal, this.likeTableName = likeTableName; } - @Explain(displayName = "if not exists") + @Explain(displayName = "if not exists", displayOnlyOnTrue = true) public boolean getIfNotExists() { return ifNotExists; } @@ -103,7 +103,7 @@ public void setLocation(String location) { this.location = location; } - @Explain(displayName = "isExternal") + @Explain(displayName = "isExternal", displayOnlyOnTrue = true) public boolean isExternal() { return isExternal; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java index 30549e7..dd76a82 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java @@ -144,7 +144,7 @@ public void setTblProps(Map tblProps) { return tblProps; } - @Explain(displayName = "if not exists") + @Explain(displayName = "if not exists", displayOnlyOnTrue = true) public boolean getIfNotExists() { return ifNotExists; } @@ -162,7 +162,7 @@ public void setOrReplace(boolean orReplace) { this.orReplace = orReplace; } - @Explain(displayName = "is alter view as select") + @Explain(displayName = "is alter view as select", displayOnlyOnTrue = true) public boolean getIsAlterViewAs() { return isAlterViewAs; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnDesc.java index ef1c47f..23c7123 100755 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnDesc.java @@ -110,7 +110,6 @@ public String toString() { return "Column[" + column + "]"; } - @Explain(displayName = "expr") @Override public String getExprString() { return getColumn(); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java index d25b2e8..1ba6c2e 100755 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java @@ -75,7 +75,6 @@ public String toString() { return "Const " + typeInfo.toString() + " " + value; } - @Explain(displayName = "expr") @Override public String getExprString() { if (value == null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java index 909938e..82d86ee 100755 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java @@ -76,7 +76,6 @@ public ObjectInspector getWritableObjectInspector() { .getStandardWritableObjectInspectorFromTypeInfo(typeInfo); } - @Explain(displayName = "type") public String getTypeString() { return typeInfo.getTypeName(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeFieldDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeFieldDesc.java index 8a6bd08..2e375bb 100755 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeFieldDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeFieldDesc.java @@ -87,7 +87,6 @@ public String toString() { return desc.toString() + "." + fieldName; } - @Explain(displayName = "expr") @Override public String getExprString() { return desc.getExprString() + "." + fieldName; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java index 77ae7f0..76ddb1e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java @@ -145,7 +145,6 @@ public String toString() { return sb.toString(); } - @Explain(displayName = "expr") @Override public String getExprString() { // Get the children expr strings diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeNullDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeNullDesc.java index ec7cd41..e0d338e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeNullDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeNullDesc.java @@ -40,7 +40,6 @@ public Object getValue() { return null; } - @Explain(displayName = "expr") @Override public String getExprString() { return "null"; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java index 747ac85..51ebea4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java @@ -162,7 +162,7 @@ public void setCompressed(boolean compressed) { this.compressed = compressed; } - @Explain(displayName = "GlobalTableId") + @Explain(displayName = "GlobalTableId", normalExplain = false) public int getDestTableId() { return destTableId; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java index 4f221a7..5856743 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java @@ -97,6 +97,12 @@ public FilterDesc( } @Explain(displayName = "predicate") + public String getPredicateString() { + StringBuffer sb = new StringBuffer(); + PlanUtils.addExprToStringBuffer(predicate, sb); + return sb.toString(); + } + public org.apache.hadoop.hive.ql.plan.ExprNodeDesc getPredicate() { return predicate; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index 19ced53..5aa3e82 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -146,6 +146,10 @@ public void setMode(final Mode mode) { } @Explain(displayName = "keys") + public String getKeyString() { + return PlanUtils.getExprListString(keys); + } + public ArrayList getKeys() { return keys; } @@ -181,6 +185,14 @@ public void setMemoryThreshold(float memoryThreshold) { } @Explain(displayName = "aggregations") + public List getAggregatorStrings() { + List res = new ArrayList(); + for (AggregationDesc agg: aggregators) { + res.add(agg.getExprString()); + } + return res; + } + public ArrayList getAggregators() { return aggregators; } @@ -198,7 +210,7 @@ public void setGroupKeyNotReductionKey(final boolean groupKeyNotReductionKey) { this.groupKeyNotReductionKey = groupKeyNotReductionKey; } - @Explain(displayName = "bucketGroup") + @Explain(displayName = "bucketGroup", displayOnlyOnTrue = true) public boolean getBucketGroup() { return bucketGroup; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java index f00192d..3426991 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -316,9 +317,20 @@ public void setRetainList(Map> retainList) { } /** - * @return the keys + * @return the keys in string form */ @Explain(displayName = "keys") + public Map getKeysString() { + Map keyMap = new LinkedHashMap(); + for (Map.Entry> k: getKeys().entrySet()) { + keyMap.put(k.getKey(), PlanUtils.getExprListString(k.getValue())); + } + return keyMap; + } + + /** + * @return the keys + */ public Map> getKeys() { return keys; } @@ -334,7 +346,7 @@ public void setKeys(Map> keys) { /** * @return the position of the big table not in memory */ - @Explain(displayName = "Position of Big Table") + @Explain(displayName = "Position of Big Table", normalExplain = false) public int getPosBigTable() { return posBigTable; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java index 2168811..0e2c6ee 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java @@ -349,7 +349,7 @@ public void setTagOrder(Byte[] tagOrder) { this.tagOrder = tagOrder; } - @Explain(displayName = "handleSkewJoin") + @Explain(displayName = "handleSkewJoin", displayOnlyOnTrue = true) public boolean getHandleSkewJoin() { return handleSkewJoin; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java index cdd3472..be6d194 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java @@ -36,6 +36,7 @@ public LimitDesc(final int limit) { this.limit = limit; } + @Explain(displayName = "Number of rows") public int getLimit() { return limit; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index cf0ca57..526ae75 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -142,9 +142,20 @@ public void setDumpFilePrefix(String dumpFilePrefix) { } /** - * @return the keys + * @return the keys in string form */ @Explain(displayName = "keys") + public Map getKeysString() { + Map keyMap = new LinkedHashMap(); + for (Map.Entry> k: getKeys().entrySet()) { + keyMap.put(k.getKey(), PlanUtils.getExprListString(k.getValue())); + } + return keyMap; + } + + /** + * @return the keys + */ public Map> getKeys() { return keys; } @@ -160,7 +171,7 @@ public void setKeys(Map> keys) { /** * @return the position of the big table not in memory */ - @Explain(displayName = "Position of Big Table") + @Explain(displayName = "Position of Big Table", normalExplain = false) public int getPosBigTable() { return posBigTable; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index d8a15a0..19b553f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -24,7 +24,9 @@ import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; +import java.util.Set; import java.util.Map; import java.util.Map.Entry; @@ -199,7 +201,6 @@ public void setAliasToPartnInfo( this.aliasToPartnInfo = aliasToPartnInfo; } - @Explain(displayName = "Alias -> Map Operator Tree") public LinkedHashMap> getAliasToWork() { return aliasToWork; } @@ -226,7 +227,7 @@ public void setMapLocalWork(final MapredLocalWork mapLocalWork) { } - @Explain(displayName = "Split Sample") + @Explain(displayName = "Split Sample", normalExplain = false) public HashMap getNameToSplitSample() { return nameToSplitSample; } @@ -298,9 +299,15 @@ private void setAliases() { } } + @Explain(displayName = "Execution mode") + public String getVectorModeOn() { + return vectorMode ? "vectorized" : null; + } + @Override - protected List> getAllRootOperators() { - ArrayList> opList = new ArrayList>(); + @Explain(displayName = "Map Operator Tree") + public Set> getAllRootOperators() { + Set> opSet = new LinkedHashSet>(); Map> pa = getPathToAliases(); if (pa != null) { @@ -308,12 +315,12 @@ private void setAliases() { for (String a : ls) { Operator op = getAliasToWork().get(a); if (op != null ) { - opList.add(op); + opSet.add(op); } } } } - return opList; + return opSet; } public void mergeAliasedInput(String alias, String pathDir, PartitionDesc partitionInfo) { @@ -471,7 +478,7 @@ public void setSamplingType(int samplingType) { this.samplingType = samplingType; } - @Explain(displayName = "Sampling") + @Explain(displayName = "Sampling", normalExplain = false) public String getSamplingTypeString() { return samplingType == 1 ? "SAMPLING_ON_PREV_MR" : samplingType == 2 ? "SAMPLING_ON_START" : null; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index 228f938..817bb3a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.LinkedHashMap; @@ -915,4 +916,26 @@ public static ReadEntity addInput(Set inputs, ReadEntity newInput) { // make compile happy return null; } + + public static String getExprListString(Collection exprs) { + StringBuffer sb = new StringBuffer(); + boolean first = true; + for (ExprNodeDesc expr: exprs) { + if (!first) { + sb.append(", "); + } else { + first = false; + } + addExprToStringBuffer(expr, sb); + } + + return sb.length() == 0 ? null : sb.toString(); + } + + public static void addExprToStringBuffer(ExprNodeDesc expr, StringBuffer sb) { + sb.append(expr.getExprString()); + sb.append(" (type: "); + sb.append(expr.getTypeString()); + sb.append(")"); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PrincipalDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/PrincipalDesc.java index 42e5210..7dc0ded 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PrincipalDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PrincipalDesc.java @@ -50,7 +50,7 @@ public void setName(String name) { this.name = name; } - @Explain(displayName="type") + @Explain(displayName="type", normalExplain = false) public PrincipalType getType() { return type; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java index e193734..f88a120 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java @@ -144,6 +144,10 @@ public void setOutputValueColumnNames( } @Explain(displayName = "key expressions") + public String getKeyColString() { + return PlanUtils.getExprListString(keyCols); + } + public java.util.ArrayList getKeyCols() { return keyCols; } @@ -161,6 +165,10 @@ public void setNumDistributionKeys(int numKeys) { } @Explain(displayName = "value expressions") + public String getValueColsString() { + return PlanUtils.getExprListString(valueCols); + } + public java.util.ArrayList getValueCols() { return valueCols; } @@ -170,6 +178,10 @@ public void setValueCols(final java.util.ArrayList valueCols) { } @Explain(displayName = "Map-reduce partition columns") + public String getParitionColsString() { + return PlanUtils.getExprListString(partitionCols); + } + public java.util.ArrayList getPartitionCols() { return partitionCols; } @@ -179,7 +191,7 @@ public void setPartitionCols( this.partitionCols = partitionCols; } - @Explain(displayName = "tag") + @Explain(displayName = "tag", normalExplain = false) public int getTag() { return tag; } @@ -196,7 +208,7 @@ public void setTopN(int topN) { this.topN = topN; } - @Explain(displayName = "TopN") + @Explain(displayName = "TopN", normalExplain = false) public Integer getTopNExplain() { return topN > 0 ? topN : null; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java index 03edcc1..afb3648 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java @@ -21,6 +21,8 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import java.util.Set; +import java.util.LinkedHashSet; import java.util.Map; import org.apache.commons.logging.Log; @@ -95,6 +97,11 @@ public void setTagToValueDesc(final List tagToValueDesc) { this.tagToValueDesc = tagToValueDesc; } + @Explain(displayName = "Execution mode") + public String getVectorModeOn() { + return vectorMode ? "vectorized" : null; + } + @Explain(displayName = "Reduce Operator Tree") public Operator getReducer() { return reducer; @@ -122,10 +129,10 @@ public void setTagToInput(final Map tagToInput) { } @Override - protected List> getAllRootOperators() { - ArrayList> opList = new ArrayList>(); - opList.add(getReducer()); - return opList; + protected Set> getAllRootOperators() { + Set> opSet = new LinkedHashSet>(); + opSet.add(getReducer()); + return opSet; } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java index e76b8c3..fa6b548 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java @@ -76,6 +76,10 @@ public Object clone() { } @Explain(displayName = "expressions") + public String getColListString() { + return PlanUtils.getExprListString(colList); + } + public List getColList() { return colList; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java index 02a8bdc..4173ea4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -101,16 +101,16 @@ public void setColumnStatsState(State columnStatsState) { } @Override - @Explain(displayName = "") + @Explain(displayName = "Statistics") public String toString() { StringBuilder sb = new StringBuilder(); - sb.append(" numRows: "); + sb.append("Num rows: "); sb.append(numRows); - sb.append(" dataSize: "); + sb.append(" Data size: "); sb.append(dataSize); - sb.append(" basicStatsState: "); + sb.append(" Basic stats: "); sb.append(basicStatsState); - sb.append(" colStatsState: "); + sb.append(" Column stats: "); sb.append(columnStatsState); return sb.toString(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java index f48a800..0cf8465 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java @@ -51,7 +51,6 @@ public TableDesc() { * @param outputFormatClass * @param properties must contain serde class name associate with this table. */ - public TableDesc( final Class inputFormatClass, final Class outputFormatClass, final Properties properties) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java index dad5497..9112a77 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java @@ -18,10 +18,12 @@ package org.apache.hadoop.hive.ql.plan; +import java.io.Serializable; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -56,9 +58,20 @@ new HashMap, EdgeType>(); /** + * getWorkMap returns a map of "vertex name" to BaseWork + */ + @Explain(displayName = "Vertices") + public Map getWorkMap() { + Map result = new LinkedHashMap(); + for (BaseWork w: getAllWork()) { + result.put(w.getName(), w); + } + return result; + } + + /** * getAllWork returns a topologically sorted list of BaseWork */ - @Explain(skipHeader = true, displayName = "Tez Work") public List getAllWork() { List result = new LinkedList(); @@ -217,7 +230,46 @@ public void remove(BaseWork work) { invertedWorkGraph.remove(work); } + /** + * returns the edge type connecting work a and b + */ public EdgeType getEdgeProperty(BaseWork a, BaseWork b) { return edgeProperties.get(new ImmutablePair(a,b)); } + + /* + * Dependency is a class used for explain + */ + public class Dependency implements Serializable { + public BaseWork w; + public EdgeType type; + + @Explain(displayName = "Name") + public String getName() { + return w.getName(); + } + + @Explain(displayName = "Type") + public String getType() { + return type.toString(); + } + } + + @Explain(displayName = "Edges") + public Map> getDependencyMap() { + Map> result = new LinkedHashMap>(); + for (Map.Entry> entry: invertedWorkGraph.entrySet()) { + List dependencies = new LinkedList(); + for (BaseWork d: entry.getValue()) { + Dependency dependency = new Dependency(); + dependency.w = d; + dependency.type = getEdgeProperty(d, entry.getKey()); + dependencies.add(dependency); + } + if (!dependencies.isEmpty()) { + result.put(entry.getKey().getName(), dependencies); + } + } + return result; + } } diff --git ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out index 25d7b57..da6dd08 100644 --- ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out +++ ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out @@ -78,9 +78,6 @@ select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and a.ds="2008-04-08" and b.ds="2008-04-08" POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) ds) "2008-04-08")) (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -88,39 +85,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - expr: value - type: string - b + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - value expressions: - expr: value - type: string + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -128,20 +111,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col6 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -161,9 +139,6 @@ select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and a.ds="2008-04-08" and b.ds="2008-04-08" POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) ds) "2008-04-08")) (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -171,34 +146,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 0 + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col6 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -213,6 +182,7 @@ STAGE PLANS: b TableScan alias: b + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientnegative/index_bitmap_no_map_aggr.q.out ql/src/test/results/clientnegative/index_bitmap_no_map_aggr.q.out index 56ab78c..31cada1 100644 --- ql/src/test/results/clientnegative/index_bitmap_no_map_aggr.q.out +++ ql/src/test/results/clientnegative/index_bitmap_no_map_aggr.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATEINDEX POSTHOOK: query: EXPLAIN CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX -ABSTRACT SYNTAX TREE: - (TOK_CREATEINDEX src1_index 'BITMAP' (TOK_TABNAME src) (TOK_TABCOLNAME key) TOK_DEFERRED_REBUILDINDEX) - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientnegative/script_error.q.out ql/src/test/results/clientnegative/script_error.q.out index 64bf5b5..663ebed 100644 --- ql/src/test/results/clientnegative/script_error.q.out +++ ql/src/test/results/clientnegative/script_error.q.out @@ -6,9 +6,6 @@ POSTHOOK: query: EXPLAIN SELECT TRANSFORM(src.key, src.value) USING '../../data/scripts/error_script' AS (tkey, tvalue) FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value)) TOK_SERDE TOK_RECORDWRITER '../../data/scripts/error_script' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST tkey tvalue)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -16,26 +13,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: ../../data/scripts/error_script output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out index 0f8deba..c6877d5 100644 --- ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out +++ ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out @@ -58,9 +58,6 @@ POSTHOOK: Lineage: table_asc.key EXPRESSION [(src)src.FieldSchema(name:key, type POSTHOOK: Lineage: table_asc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: table_desc.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: table_desc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table_asc) a) (TOK_TABREF (TOK_TABNAME table_desc) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -68,36 +65,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 + Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -112,6 +101,7 @@ STAGE PLANS: a TableScan alias: a + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientnegative/udf_assert_true.q.out ql/src/test/results/clientnegative/udf_assert_true.q.out index 0c23b6d..4d05eb0 100644 --- ql/src/test/results/clientnegative/udf_assert_true.q.out +++ ql/src/test/results/clientnegative/udf_assert_true.q.out @@ -7,9 +7,6 @@ PREHOOK: query: EXPLAIN SELECT ASSERT_TRUE(x > 0) FROM src LATERAL VIEW EXPLODE( PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT ASSERT_TRUE(x > 0) FROM src LATERAL VIEW EXPLODE(ARRAY(1, 2)) a AS x LIMIT 2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION EXPLODE (TOK_FUNCTION ARRAY 1 2)) x (TOK_TABALIAS a))) (TOK_TABREF (TOK_TABNAME src)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION ASSERT_TRUE (> (TOK_TABLE_OR_COL x) 0)))) (TOK_LIMIT 2))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -17,45 +14,51 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Forward + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: assert_true((_col4 > 0)) - type: void + expressions: assert_true((_col4 > 0)) (type: void) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array(1,2) - type: array + expressions: array(1,2) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: assert_true((_col4 > 0)) - type: void + expressions: assert_true((_col4 > 0)) (type: void) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -79,9 +82,6 @@ PREHOOK: query: EXPLAIN SELECT ASSERT_TRUE(x < 2) FROM src LATERAL VIEW EXPLODE( PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT ASSERT_TRUE(x < 2) FROM src LATERAL VIEW EXPLODE(ARRAY(1, 2)) a AS x LIMIT 2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION EXPLODE (TOK_FUNCTION ARRAY 1 2)) x (TOK_TABALIAS a))) (TOK_TABREF (TOK_TABNAME src)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION ASSERT_TRUE (< (TOK_TABLE_OR_COL x) 2)))) (TOK_LIMIT 2))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -89,45 +89,51 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Forward + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: assert_true((_col4 < 2)) - type: void + expressions: assert_true((_col4 < 2)) (type: void) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array(1,2) - type: array + expressions: array(1,2) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: assert_true((_col4 < 2)) - type: void + expressions: assert_true((_col4 < 2)) (type: void) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientnegative/udf_assert_true2.q.out ql/src/test/results/clientnegative/udf_assert_true2.q.out index 3083853..ff1e6cd 100644 --- ql/src/test/results/clientnegative/udf_assert_true2.q.out +++ ql/src/test/results/clientnegative/udf_assert_true2.q.out @@ -2,9 +2,6 @@ PREHOOK: query: EXPLAIN SELECT 1 + ASSERT_TRUE(x < 2) FROM src LATERAL VIEW EXPL PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT 1 + ASSERT_TRUE(x < 2) FROM src LATERAL VIEW EXPLODE(ARRAY(1, 2)) a AS x LIMIT 2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION EXPLODE (TOK_FUNCTION ARRAY 1 2)) x (TOK_TABALIAS a))) (TOK_TABREF (TOK_TABNAME src)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ 1 (TOK_FUNCTION ASSERT_TRUE (< (TOK_TABLE_OR_COL x) 2))))) (TOK_LIMIT 2))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -12,45 +9,51 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Forward + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: (1 + assert_true((_col4 < 2))) - type: double + expressions: (1 + assert_true((_col4 < 2))) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array(1,2) - type: array + expressions: array(1,2) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: (1 + assert_true((_col4 < 2))) - type: double + expressions: (1 + assert_true((_col4 < 2))) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/add_part_multiple.q.out ql/src/test/results/clientpositive/add_part_multiple.q.out index 0c288f5..b8de0d4 100644 --- ql/src/test/results/clientpositive/add_part_multiple.q.out +++ ql/src/test/results/clientpositive/add_part_multiple.q.out @@ -21,9 +21,6 @@ PARTITION (ds='2010-02-01') location 'B' PARTITION (ds='2010-03-01') PARTITION (ds='2010-04-01') location 'C' POSTHOOK: type: ALTERTABLE_ADDPARTS -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/alias_casted_column.q.out ql/src/test/results/clientpositive/alias_casted_column.q.out index 0c4cc5d..1259609 100644 --- ql/src/test/results/clientpositive/alias_casted_column.q.out +++ ql/src/test/results/clientpositive/alias_casted_column.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- HIVE-2477 Use name of original expression for name of CAST output explain select key from (select cast(key as int) from src )t POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL key)))))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,18 +11,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int + expressions: UDFToInteger(key) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -41,9 +37,6 @@ PREHOOK: type: QUERY POSTHOOK: query: --backward explain select key2 from (select cast(key as int) key2 from src )t POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL key)) key2)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key2))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -51,18 +44,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int + expressions: UDFToInteger(key) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/allcolref_in_udf.q.out ql/src/test/results/clientpositive/allcolref_in_udf.q.out index 3e78246..781ba9e 100644 --- ql/src/test/results/clientpositive/allcolref_in_udf.q.out +++ ql/src/test/results/clientpositive/allcolref_in_udf.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select concat(*),array(*) from src where key < 100 limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR concat)) (TOK_SELEXPR (TOK_FUNCTIONSTAR array))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 100)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,25 +11,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: concat(key, value) - type: string - expr: array(key,value) - type: array + expressions: concat(key, value) (type: string), array(key,value) (type: array) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -76,9 +71,6 @@ select stack(2, *) as (e1,e2,e3) from ( select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) from src a join src b on a.key+1=b.key where a.key < 100) x limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (+ (. (TOK_TABLE_OR_COL a) key) 1) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR concat)) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_ALLCOLREF (TOK_TABNAME a)))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_ALLCOLREF (TOK_TABNAME b)))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_ALLCOLREF (TOK_TABNAME a)) (. (TOK_TABLE_OR_COL b) key))) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) key) (TOK_ALLCOLREF (TOK_TABNAME b))))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 100)))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION stack 2 TOK_ALLCOLREF) e1 e2 e3)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -86,45 +78,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x:a + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: (key + 1) - type: double + key expressions: (key + 1) (type: double) sort order: + - Map-reduce partition columns: - expr: (key + 1) - type: double - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - x:b - TableScan - alias: b - Reduce Output Operator - key expressions: - expr: UDFToDouble(key) - type: double - sort order: + - Map-reduce partition columns: - expr: UDFToDouble(key) - type: double - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: (key + 1) (type: double) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -132,29 +107,21 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: 2 - type: int - expr: concat(_col0, _col1, _col4, _col5) - type: string - expr: concat(_col0, _col1) - type: string - expr: concat(_col4, _col5) - type: string - expr: concat(_col0, _col1, _col4) - type: string - expr: concat(_col0, _col4, _col5) - type: string + expressions: 2 (type: int), concat(_col0, _col1, _col4, _col5) (type: string), concat(_col0, _col1) (type: string), concat(_col4, _col5) (type: string), concat(_col0, _col1, _col4) (type: string), concat(_col0, _col4, _col5) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE function name: stack Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2060 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2060 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -199,9 +166,6 @@ PREHOOK: query: explain select explode(*) as x from allcolref limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain select explode(*) as x from allcolref limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME allcolref))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR explode) x)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -209,21 +173,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - allcolref + Map Operator Tree: TableScan alias: allcolref + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _c0 - type: array + expressions: _c0 (type: array) outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE function name: explode Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/alter_partition_coltype.q.out ql/src/test/results/clientpositive/alter_partition_coltype.q.out index 04b9b2c..49c1051 100644 --- ql/src/test/results/clientpositive/alter_partition_coltype.q.out +++ ql/src/test/results/clientpositive/alter_partition_coltype.q.out @@ -99,7 +99,26 @@ POSTHOOK: Lineage: alter_coltype PARTITION(dt=10,ts=3.0).value SIMPLE [(src1)src POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alter_coltype))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (= (TOK_TABLE_OR_COL dt) '100x')))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alter_coltype + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + TOK_WHERE + = + TOK_TABLE_OR_COL + dt + '100x' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -108,32 +127,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alter_coltype + Map Operator Tree: TableScan alias: alter_coltype - Statistics: - numRows: 25 dataSize: 191 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - Statistics: - numRows: 25 dataSize: 191 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -184,27 +194,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -308,7 +311,26 @@ POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=3.0).value SIMPLE [(src1)s POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alter_coltype))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (= (TOK_TABLE_OR_COL ts) '6:30pm')))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alter_coltype + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + TOK_WHERE + = + TOK_TABLE_OR_COL + ts + '6:30pm' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -317,32 +339,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alter_coltype + Map Operator Tree: TableScan alias: alter_coltype - Statistics: - numRows: 25 dataSize: 191 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - Statistics: - numRows: 25 dataSize: 191 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -393,27 +406,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -470,7 +476,31 @@ POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=3.0).value SIMPLE [(src1)s POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alter_coltype))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ts) 3.0) (= (TOK_TABLE_OR_COL dt) 10))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alter_coltype + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + TOK_WHERE + and + = + TOK_TABLE_OR_COL + ts + 3.0 + = + TOK_TABLE_OR_COL + dt + 10 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -479,39 +509,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alter_coltype + Map Operator Tree: TableScan alias: alter_coltype - Statistics: - numRows: 75 dataSize: 573 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 75 Data size: 573 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((ts = 3.0) and (dt = 10)) - type: boolean - Statistics: - numRows: 75 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + predicate: ((ts = 3.0) and (dt = 10)) (type: boolean) + Statistics: Num rows: 75 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - Statistics: - numRows: 75 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 75 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -648,27 +666,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -797,7 +808,35 @@ POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=3.0).value SIMPLE [(src1)s POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alter_coltype))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL dt)) (TOK_SELEXPR (TOK_TABLE_OR_COL ts))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL dt))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alter_coltype + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + dt + TOK_SELEXPR + TOK_TABLE_OR_COL + ts + TOK_WHERE + TOK_FUNCTION + TOK_ISNOTNULL + TOK_TABLE_OR_COL + dt + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -806,33 +845,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alter_coltype + Map Operator Tree: TableScan alias: alter_coltype - Statistics: - numRows: 75 dataSize: 573 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 75 Data size: 573 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: dt - type: string - expr: ts - type: string + expressions: key (type: string), value (type: string), dt (type: string), ts (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 75 dataSize: 573 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 75 Data size: 573 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 75 dataSize: 573 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 75 Data size: 573 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1041,7 +1068,31 @@ POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=3.0).value SIMPLE [(src1)s POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alter_coltype))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (or (= (TOK_TABLE_OR_COL dt) '100x') (= (TOK_TABLE_OR_COL dt) '10'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alter_coltype + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + TOK_WHERE + or + = + TOK_TABLE_OR_COL + dt + '100x' + = + TOK_TABLE_OR_COL + dt + '10' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1050,32 +1101,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alter_coltype + Map Operator Tree: TableScan alias: alter_coltype - Statistics: - numRows: 75 dataSize: 573 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 75 Data size: 573 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - Statistics: - numRows: 75 dataSize: 573 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 75 Data size: 573 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1212,27 +1254,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/ambiguous_col.q.out ql/src/test/results/clientpositive/ambiguous_col.q.out index 7ebb0a7..589643e 100644 --- ql/src/test/results/clientpositive/ambiguous_col.q.out +++ ql/src/test/results/clientpositive/ambiguous_col.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- TOK_ALLCOLREF explain select * from (select a.key, a.* from (select * from src) a join (select * from src1) b on (a.key = b.key)) t POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,48 +11,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t:a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - t:b:src1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -63,18 +44,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -90,9 +68,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- DOT explain select * from (select a.key, a.`[k].*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) `[k].*`))))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -100,44 +75,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t:a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - t:b:src1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -145,16 +108,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -170,9 +132,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- EXPRESSION explain select * from (select a.key, a.key from (select * from src) a join (select * from src1) b on (a.key = b.key)) t POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -180,44 +139,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t:a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - t:b:src1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -225,16 +172,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/annotate_stats_filter.q.out ql/src/test/results/clientpositive/annotate_stats_filter.q.out index 2eaf089..e6eae8a 100644 --- ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -55,7 +55,20 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -67,22 +80,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- column stats are not COMPLETE, so stats are not updated @@ -98,7 +101,25 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL state) 'OH')))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + TOK_TABLE_OR_COL + state + 'OH' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -107,40 +128,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (state = 'OH') - type: boolean - Statistics: - numRows: 4 dataSize: 398 basicStatsState: COMPLETE colStatsState: NONE + predicate: (state = 'OH') (type: boolean) + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 4 dataSize: 398 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 4 dataSize: 398 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -235,7 +241,25 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL state) 'OH')))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + TOK_TABLE_OR_COL + state + 'OH' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -244,40 +268,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (state = 'OH') - type: boolean - Statistics: - numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: (state = 'OH') (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -360,7 +369,25 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (!= (TOK_TABLE_OR_COL state) 'OH')))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + != + TOK_TABLE_OR_COL + state + 'OH' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -369,40 +396,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (state <> 'OH') - type: boolean - Statistics: - numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: (state <> 'OH') (type: boolean) + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -481,7 +493,25 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (<> (TOK_TABLE_OR_COL state) 'OH')))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + <> + TOK_TABLE_OR_COL + state + 'OH' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -490,40 +520,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (state <> 'OH') - type: boolean - Statistics: - numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: (state <> 'OH') (type: boolean) + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -606,7 +621,25 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL zip))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + TOK_FUNCTION + TOK_ISNULL + TOK_TABLE_OR_COL + zip + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -615,40 +648,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: zip is null - type: boolean - Statistics: - numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: zip is null (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -729,7 +747,26 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL zip)))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + ! + TOK_FUNCTION + TOK_ISNOTNULL + TOK_TABLE_OR_COL + zip + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -738,40 +775,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (not zip is not null) - type: boolean - Statistics: - numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: (not zip is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -854,7 +876,25 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL zip))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + TOK_FUNCTION + TOK_ISNOTNULL + TOK_TABLE_OR_COL + zip + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -863,40 +903,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: zip is not null - type: boolean - Statistics: - numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: zip is not null (type: boolean) + Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -977,7 +1002,26 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL zip)))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + ! + TOK_FUNCTION + TOK_ISNULL + TOK_TABLE_OR_COL + zip + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -986,40 +1030,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (not zip is null) - type: boolean - Statistics: - numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: (not zip is null) (type: boolean) + Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1102,7 +1131,23 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! false)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + ! + false + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1111,40 +1156,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (not false) - type: boolean - Statistics: - numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: (not false) (type: boolean) + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1225,7 +1255,23 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! true)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + ! + true + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1234,40 +1280,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (not true) - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + predicate: (not true) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1350,7 +1381,30 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (= (TOK_TABLE_OR_COL state) 'OH') (= (TOK_TABLE_OR_COL state) 'CA'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + or + = + TOK_TABLE_OR_COL + state + 'OH' + = + TOK_TABLE_OR_COL + state + 'CA' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1359,40 +1413,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((state = 'OH') or (state = 'CA')) - type: boolean - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: ((state = 'OH') or (state = 'CA')) (type: boolean) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1475,7 +1514,30 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL year) 2001) (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL year)))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + = + TOK_TABLE_OR_COL + year + 2001 + TOK_FUNCTION + TOK_ISNULL + TOK_TABLE_OR_COL + year + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1484,40 +1546,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((year = 2001) and year is null) - type: boolean - Statistics: - numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: ((year = 2001) and year is null) (type: boolean) + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1598,7 +1645,35 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL year) 2001) (= (TOK_TABLE_OR_COL state) 'OH')) (= (TOK_TABLE_OR_COL state) 'FL'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + and + = + TOK_TABLE_OR_COL + year + 2001 + = + TOK_TABLE_OR_COL + state + 'OH' + = + TOK_TABLE_OR_COL + state + 'FL' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1607,40 +1682,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (((year = 2001) and (state = 'OH')) and (state = 'FL')) - type: boolean - Statistics: - numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: (((year = 2001) and (state = 'OH')) and (state = 'FL')) (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1723,7 +1783,35 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (and (= (TOK_TABLE_OR_COL year) 2001) (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL year))) (= (TOK_TABLE_OR_COL state) 'CA'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + or + and + = + TOK_TABLE_OR_COL + year + 2001 + TOK_FUNCTION + TOK_ISNULL + TOK_TABLE_OR_COL + year + = + TOK_TABLE_OR_COL + state + 'CA' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1732,40 +1820,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (((year = 2001) and year is null) or (state = 'CA')) - type: boolean - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: (((year = 2001) and year is null) or (state = 'CA')) (type: boolean) + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1848,7 +1921,35 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (or (= (TOK_TABLE_OR_COL year) 2001) (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL year))) (= (TOK_TABLE_OR_COL state) 'CA'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + or + = + TOK_TABLE_OR_COL + year + 2001 + TOK_FUNCTION + TOK_ISNULL + TOK_TABLE_OR_COL + year + = + TOK_TABLE_OR_COL + state + 'CA' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1857,40 +1958,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (((year = 2001) or year is null) and (state = 'CA')) - type: boolean - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: (((year = 2001) or year is null) and (state = 'CA')) (type: boolean) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1973,7 +2059,25 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL locid) 30)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + < + TOK_TABLE_OR_COL + locid + 30 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1982,40 +2086,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (locid < 30) - type: boolean - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: (locid < 30) (type: boolean) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2094,7 +2183,25 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL locid) 30)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + > + TOK_TABLE_OR_COL + locid + 30 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2103,40 +2210,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (locid > 30) - type: boolean - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: (locid > 30) (type: boolean) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2215,7 +2307,25 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (<= (TOK_TABLE_OR_COL locid) 30)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + <= + TOK_TABLE_OR_COL + locid + 30 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2224,40 +2334,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (locid <= 30) - type: boolean - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: (locid <= 30) (type: boolean) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2336,7 +2431,25 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (>= (TOK_TABLE_OR_COL locid) 30)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + >= + TOK_TABLE_OR_COL + locid + 30 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2345,40 +2458,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (locid >= 30) - type: boolean - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: (locid >= 30) (type: boolean) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index 0aa57b6..e55c35b 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -55,7 +55,20 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -67,22 +80,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- partial column stats @@ -122,7 +125,60 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state) a) (TOK_SELEXPR (TOK_TABLE_OR_COL locid) b) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) c)) (TOK_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) sq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL c)) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b)))) (TOK_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL c)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + a + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + c + TOK_GROUPBY + TOK_TABLE_OR_COL + state + TOK_TABLE_OR_COL + locid + sq1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + a + TOK_SELEXPR + TOK_TABLE_OR_COL + c + TOK_SELEXPR + TOK_FUNCTION + min + TOK_TABLE_OR_COL + b + TOK_GROUPBY + TOK_TABLE_OR_COL + a + TOK_TABLE_OR_COL + c + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -132,53 +188,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - sq1:loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int + expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: state - type: string - expr: locid - type: int + aggregations: count() + keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: int + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: int - Statistics: - numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: PARTIAL + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: - expr: _col2 - type: bigint + value expressions: _col2 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -231,42 +262,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 4 dataSize: 344 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 4 dataSize: 344 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: - expr: min(_col1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col2 - type: bigint + aggregations: min(_col1) + keys: _col0 (type: string), _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 4 dataSize: 376 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 @@ -287,28 +297,16 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: bigint - Statistics: - numRows: 4 dataSize: 376 basicStatsState: COMPLETE colStatsState: PARTIAL + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL tag: -1 - value expressions: - expr: _col2 - type: int + value expressions: _col2 (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -337,36 +335,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: min(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: bigint + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 2 dataSize: 188 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: int + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 2 dataSize: 196 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 196 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -414,7 +397,24 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL year))) (TOK_GROUPBY (TOK_TABLE_OR_COL year)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + year + TOK_GROUPBY + TOK_TABLE_OR_COL + year + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -423,39 +423,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: year - type: int + expressions: year (type: int) outputColumnNames: year - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - bucketGroup: false - keys: - expr: year - type: int + keys: year (type: int) mode: hash outputColumnNames: _col0 - Statistics: - numRows: 8 dataSize: 28 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - Statistics: - numRows: 8 dataSize: 28 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 Path -> Alias: #### A masked pattern was here #### @@ -509,28 +495,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -564,7 +542,29 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_GROUPBY + TOK_TABLE_OR_COL + state + TOK_TABLE_OR_COL + locid + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -573,47 +573,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int + expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - bucketGroup: false - keys: - expr: state - type: string - expr: locid - type: int + keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: - numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: int + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: int - Statistics: - numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 Path -> Alias: #### A masked pattern was here #### @@ -667,32 +645,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: int + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -724,7 +690,29 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_CUBE_GROUPBY + TOK_TABLE_OR_COL + state + TOK_TABLE_OR_COL + locid + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -733,53 +721,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int + expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - bucketGroup: false - keys: - expr: state - type: string - expr: locid - type: int - expr: '0' - type: string + keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 32 dataSize: 3184 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string - Statistics: - numRows: 32 dataSize: 3184 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 Path -> Alias: #### A masked pattern was here #### @@ -833,34 +793,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: int - expr: KEY._col2 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 16 dataSize: 2800 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 16 dataSize: 1440 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 16 dataSize: 1440 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -892,7 +838,29 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_ROLLUP_GROUPBY + TOK_TABLE_OR_COL + state + TOK_TABLE_OR_COL + locid + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -901,53 +869,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int + expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - bucketGroup: false - keys: - expr: state - type: string - expr: locid - type: int - expr: '0' - type: string + keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 24 dataSize: 2388 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string - Statistics: - numRows: 24 dataSize: 2388 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 Path -> Alias: #### A masked pattern was here #### @@ -1001,34 +941,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: int - expr: KEY._col2 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 12 dataSize: 2100 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 12 dataSize: 1080 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 12 dataSize: 1080 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1060,7 +986,32 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_GROUPING_SETS + TOK_TABLE_OR_COL + state + TOK_TABLE_OR_COL + locid + TOK_GROUPING_SETS_EXPRESSION + TOK_TABLE_OR_COL + state + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1069,53 +1020,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int + expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - bucketGroup: false - keys: - expr: state - type: string - expr: locid - type: int - expr: '0' - type: string + keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 Path -> Alias: #### A masked pattern was here #### @@ -1169,34 +1092,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: int - expr: KEY._col2 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 4 dataSize: 700 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 700 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1228,7 +1137,35 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL locid))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_GROUPING_SETS + TOK_TABLE_OR_COL + state + TOK_TABLE_OR_COL + locid + TOK_GROUPING_SETS_EXPRESSION + TOK_TABLE_OR_COL + state + TOK_GROUPING_SETS_EXPRESSION + TOK_TABLE_OR_COL + locid + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1237,53 +1174,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int + expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - bucketGroup: false - keys: - expr: state - type: string - expr: locid - type: int - expr: '0' - type: string + keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string - Statistics: - numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 Path -> Alias: #### A masked pattern was here #### @@ -1337,34 +1246,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: int - expr: KEY._col2 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 8 dataSize: 1400 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1396,7 +1291,36 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL locid)) TOK_GROUPING_SETS_EXPRESSION))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_GROUPING_SETS + TOK_TABLE_OR_COL + state + TOK_TABLE_OR_COL + locid + TOK_GROUPING_SETS_EXPRESSION + TOK_TABLE_OR_COL + state + TOK_GROUPING_SETS_EXPRESSION + TOK_TABLE_OR_COL + locid + TOK_GROUPING_SETS_EXPRESSION + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1405,53 +1329,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int + expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - bucketGroup: false - keys: - expr: state - type: string - expr: locid - type: int - expr: '0' - type: string + keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 24 dataSize: 2388 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string - Statistics: - numRows: 24 dataSize: 2388 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 Path -> Alias: #### A masked pattern was here #### @@ -1505,34 +1401,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: int - expr: KEY._col2 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 12 dataSize: 2100 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 12 dataSize: 1080 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 12 dataSize: 1080 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1564,7 +1446,41 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL locid)) TOK_GROUPING_SETS_EXPRESSION))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_GROUPING_SETS + TOK_TABLE_OR_COL + state + TOK_TABLE_OR_COL + locid + TOK_GROUPING_SETS_EXPRESSION + TOK_TABLE_OR_COL + state + TOK_TABLE_OR_COL + locid + TOK_GROUPING_SETS_EXPRESSION + TOK_TABLE_OR_COL + state + TOK_GROUPING_SETS_EXPRESSION + TOK_TABLE_OR_COL + locid + TOK_GROUPING_SETS_EXPRESSION + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1573,53 +1489,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int + expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - bucketGroup: false - keys: - expr: state - type: string - expr: locid - type: int - expr: '0' - type: string + keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 32 dataSize: 3184 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string - Statistics: - numRows: 32 dataSize: 3184 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 Path -> Alias: #### A masked pattern was here #### @@ -1673,34 +1561,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: int - expr: KEY._col2 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 16 dataSize: 2800 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 16 dataSize: 1440 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 16 dataSize: 1440 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1734,7 +1608,24 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL year))) (TOK_GROUPBY (TOK_TABLE_OR_COL year)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + year + TOK_GROUPBY + TOK_TABLE_OR_COL + year + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1743,39 +1634,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: year - type: int + expressions: year (type: int) outputColumnNames: year - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - bucketGroup: false - keys: - expr: year - type: int + keys: year (type: int) mode: hash outputColumnNames: _col0 - Statistics: - numRows: 80 dataSize: 280 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 80 Data size: 280 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - Statistics: - numRows: 80 dataSize: 280 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80 Data size: 280 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 Path -> Alias: #### A masked pattern was here #### @@ -1829,28 +1706,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1882,7 +1751,29 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_CUBE_GROUPBY + TOK_TABLE_OR_COL + state + TOK_TABLE_OR_COL + locid + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1891,53 +1782,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int + expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - bucketGroup: false - keys: - expr: state - type: string - expr: locid - type: int - expr: '0' - type: string + keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 320 dataSize: 31840 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string - Statistics: - numRows: 320 dataSize: 31840 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 Path -> Alias: #### A masked pattern was here #### @@ -1991,34 +1854,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: int - expr: KEY._col2 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 42 dataSize: 7350 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 42 Data size: 7350 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 42 dataSize: 3780 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 42 Data size: 3780 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 42 dataSize: 3780 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 42 Data size: 3780 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/annotate_stats_join.q.out ql/src/test/results/clientpositive/annotate_stats_join.q.out index 09cfb3a..523d386 100644 --- ql/src/test/results/clientpositive/annotate_stats_join.q.out +++ ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -221,7 +221,35 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + emp_orc + e + TOK_TABREF + TOK_TABNAME + dept_orc + d + = + . + TOK_TABLE_OR_COL + e + deptid + . + TOK_TABLE_OR_COL + d + deptid + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -230,51 +258,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - d + Map Operator Tree: TableScan alias: d - Statistics: - numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: deptid - type: int + key expressions: deptid (type: int) sort order: + - Map-reduce partition columns: - expr: deptid - type: int - Statistics: - numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 - value expressions: - expr: deptid - type: int - expr: deptname - type: string - e + value expressions: deptid (type: int), deptname (type: string) TableScan alias: e - Statistics: - numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: deptid - type: int + key expressions: deptid (type: int) sort order: + - Map-reduce partition columns: - expr: deptid - type: int - Statistics: - numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 - value expressions: - expr: lastname - type: string - expr: deptid - type: int + value expressions: lastname (type: string), deptid (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -377,30 +383,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 - Statistics: - numRows: 4 dataSize: 760 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 4 dataSize: 760 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 4 dataSize: 760 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -440,7 +434,49 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid))) (TOK_TABREF (TOK_TABNAME emp_orc) e1) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL e1) deptid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + emp_orc + e + TOK_TABREF + TOK_TABNAME + dept_orc + d + = + . + TOK_TABLE_OR_COL + e + deptid + . + TOK_TABLE_OR_COL + d + deptid + TOK_TABREF + TOK_TABNAME + emp_orc + e1 + = + . + TOK_TABLE_OR_COL + e + deptid + . + TOK_TABLE_OR_COL + e1 + deptid + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -449,73 +485,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - d + Map Operator Tree: TableScan alias: d - Statistics: - numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: deptid - type: int + key expressions: deptid (type: int) sort order: + - Map-reduce partition columns: - expr: deptid - type: int - Statistics: - numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 - value expressions: - expr: deptid - type: int - expr: deptname - type: string - e + value expressions: deptid (type: int), deptname (type: string) TableScan - alias: e - Statistics: - numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + alias: e1 + Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: deptid - type: int + key expressions: deptid (type: int) sort order: + - Map-reduce partition columns: - expr: deptid - type: int - Statistics: - numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE - tag: 0 - value expressions: - expr: lastname - type: string - expr: deptid - type: int - e1 + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + tag: 2 + value expressions: lastname (type: string), deptid (type: int) TableScan - alias: e1 - Statistics: - numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + alias: e + Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: deptid - type: int + key expressions: deptid (type: int) sort order: + - Map-reduce partition columns: - expr: deptid - type: int - Statistics: - numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE - tag: 2 - value expressions: - expr: lastname - type: string - expr: deptid - type: int + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: lastname (type: string), deptid (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -620,34 +623,18 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Statistics: - numRows: 4 dataSize: 1136 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 1136 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: int + expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string), _col8 (type: string), _col9 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: - numRows: 4 dataSize: 1136 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 1136 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 4 dataSize: 1136 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 1136 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -685,7 +672,49 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid))) (TOK_TABREF (TOK_TABNAME loc_orc) l) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL l) locid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + emp_orc + e + TOK_TABREF + TOK_TABNAME + dept_orc + d + = + . + TOK_TABLE_OR_COL + e + deptid + . + TOK_TABLE_OR_COL + d + deptid + TOK_TABREF + TOK_TABNAME + loc_orc + l + = + . + TOK_TABLE_OR_COL + e + deptid + . + TOK_TABLE_OR_COL + l + locid + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -694,77 +723,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - d + Map Operator Tree: TableScan alias: d - Statistics: - numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: deptid - type: int + key expressions: deptid (type: int) sort order: + - Map-reduce partition columns: - expr: deptid - type: int - Statistics: - numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 - value expressions: - expr: deptid - type: int - expr: deptname - type: string - e + value expressions: deptid (type: int), deptname (type: string) TableScan alias: e - Statistics: - numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: deptid - type: int + key expressions: deptid (type: int) sort order: + - Map-reduce partition columns: - expr: deptid - type: int - Statistics: - numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 - value expressions: - expr: lastname - type: string - expr: deptid - type: int - l + value expressions: lastname (type: string), deptid (type: int) TableScan alias: l - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: locid - type: int + key expressions: locid (type: int) sort order: + - Map-reduce partition columns: - expr: locid - type: int - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: locid (type: int) + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE tag: 2 - value expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + value expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -914,38 +906,18 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11 - Statistics: - numRows: 5 dataSize: 1449 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 5 Data size: 1449 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: int - expr: _col10 - type: bigint - expr: _col11 - type: int + expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: bigint), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: - numRows: 5 dataSize: 1449 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 5 Data size: 1449 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 5 dataSize: 1449 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 5 Data size: 1449 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -985,7 +957,49 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid))) (TOK_TABREF (TOK_TABNAME loc_orc) l) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL l) state)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + emp_orc + e + TOK_TABREF + TOK_TABNAME + dept_orc + d + = + . + TOK_TABLE_OR_COL + e + deptid + . + TOK_TABLE_OR_COL + d + deptid + TOK_TABREF + TOK_TABNAME + loc_orc + l + = + . + TOK_TABLE_OR_COL + e + deptid + . + TOK_TABLE_OR_COL + l + state + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -994,77 +1008,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - d + Map Operator Tree: TableScan alias: d - Statistics: - numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: UDFToDouble(deptid) - type: double + key expressions: UDFToDouble(deptid) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(deptid) - type: double - Statistics: - numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: UDFToDouble(deptid) (type: double) + Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 - value expressions: - expr: deptid - type: int - expr: deptname - type: string - e + value expressions: deptid (type: int), deptname (type: string) TableScan alias: e - Statistics: - numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: UDFToDouble(deptid) - type: double + key expressions: UDFToDouble(deptid) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(deptid) - type: double - Statistics: - numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: UDFToDouble(deptid) (type: double) + Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 - value expressions: - expr: lastname - type: string - expr: deptid - type: int - l + value expressions: lastname (type: string), deptid (type: int) TableScan alias: l - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: UDFToDouble(state) - type: double + key expressions: UDFToDouble(state) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(state) - type: double - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: UDFToDouble(state) (type: double) + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE tag: 2 - value expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + value expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1214,38 +1191,18 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11 - Statistics: - numRows: 4 dataSize: 1156 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 1156 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: int - expr: _col10 - type: bigint - expr: _col11 - type: int + expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: bigint), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: - numRows: 4 dataSize: 1156 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 1156 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 4 dataSize: 1156 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 1156 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1285,7 +1242,45 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (and (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)) (= (. (TOK_TABLE_OR_COL e) lastname) (. (TOK_TABLE_OR_COL d) deptname))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + emp_orc + e + TOK_TABREF + TOK_TABNAME + dept_orc + d + and + = + . + TOK_TABLE_OR_COL + e + deptid + . + TOK_TABLE_OR_COL + d + deptid + = + . + TOK_TABLE_OR_COL + e + lastname + . + TOK_TABLE_OR_COL + d + deptname + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1294,59 +1289,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - d + Map Operator Tree: TableScan alias: d - Statistics: - numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: deptid - type: int - expr: deptname - type: string + key expressions: deptid (type: int), deptname (type: string) sort order: ++ - Map-reduce partition columns: - expr: deptid - type: int - expr: deptname - type: string - Statistics: - numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: deptid (type: int), deptname (type: string) + Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 - value expressions: - expr: deptid - type: int - expr: deptname - type: string - e + value expressions: deptid (type: int), deptname (type: string) TableScan alias: e - Statistics: - numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: deptid - type: int - expr: lastname - type: string + key expressions: deptid (type: int), lastname (type: string) sort order: ++ - Map-reduce partition columns: - expr: deptid - type: int - expr: lastname - type: string - Statistics: - numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: deptid (type: int), lastname (type: string) + Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 - value expressions: - expr: lastname - type: string - expr: deptid - type: int + value expressions: lastname (type: string), deptid (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1449,30 +1414,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1512,7 +1465,69 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (and (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)) (= (. (TOK_TABLE_OR_COL e) lastname) (. (TOK_TABLE_OR_COL d) deptname)))) (TOK_TABREF (TOK_TABNAME loc_orc) l) (and (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL l) locid)) (= (. (TOK_TABLE_OR_COL e) lastname) (. (TOK_TABLE_OR_COL l) state))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + emp_orc + e + TOK_TABREF + TOK_TABNAME + dept_orc + d + and + = + . + TOK_TABLE_OR_COL + e + deptid + . + TOK_TABLE_OR_COL + d + deptid + = + . + TOK_TABLE_OR_COL + e + lastname + . + TOK_TABLE_OR_COL + d + deptname + TOK_TABREF + TOK_TABNAME + loc_orc + l + and + = + . + TOK_TABLE_OR_COL + e + deptid + . + TOK_TABLE_OR_COL + l + locid + = + . + TOK_TABLE_OR_COL + e + lastname + . + TOK_TABLE_OR_COL + l + state + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1521,89 +1536,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - d + Map Operator Tree: TableScan alias: d - Statistics: - numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: deptid - type: int - expr: deptname - type: string + key expressions: deptid (type: int), deptname (type: string) sort order: ++ - Map-reduce partition columns: - expr: deptid - type: int - expr: deptname - type: string - Statistics: - numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: deptid (type: int), deptname (type: string) + Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 - value expressions: - expr: deptid - type: int - expr: deptname - type: string - e + value expressions: deptid (type: int), deptname (type: string) TableScan alias: e - Statistics: - numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: deptid - type: int - expr: lastname - type: string + key expressions: deptid (type: int), lastname (type: string) sort order: ++ - Map-reduce partition columns: - expr: deptid - type: int - expr: lastname - type: string - Statistics: - numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: deptid (type: int), lastname (type: string) + Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 - value expressions: - expr: lastname - type: string - expr: deptid - type: int - l + value expressions: lastname (type: string), deptid (type: int) TableScan alias: l - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: locid - type: int - expr: state - type: string + key expressions: locid (type: int), state (type: string) sort order: ++ - Map-reduce partition columns: - expr: locid - type: int - expr: state - type: string - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: locid (type: int), state (type: string) + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE tag: 2 - value expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + value expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1753,38 +1719,18 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: int - expr: _col10 - type: bigint - expr: _col11 - type: int + expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: bigint), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/annotate_stats_limit.q.out ql/src/test/results/clientpositive/annotate_stats_limit.q.out index f45b684..e6db870 100644 --- ql/src/test/results/clientpositive/annotate_stats_limit.q.out +++ ql/src/test/results/clientpositive/annotate_stats_limit.q.out @@ -67,7 +67,20 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -79,22 +92,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: -- numRows: 4 rawDataSize: 396 @@ -108,7 +111,22 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 4))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_LIMIT + 4 + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -120,25 +138,15 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 4 dataSize: 396 basicStatsState: COMPLETE colStatsState: COMPLETE + Number of rows: 4 + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: -- greater than the available number of rows @@ -154,7 +162,22 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 16))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_LIMIT + 16 + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -166,25 +189,15 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Number of rows: 16 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: -- numRows: 0 rawDataSize: 0 @@ -198,7 +211,22 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 0))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_LIMIT + 0 + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -210,24 +238,14 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE ListSink diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out index 0a78910..2a56d6e 100644 --- ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -39,7 +39,20 @@ POSTHOOK: query: -- basicStatState: NONE colStatState: NONE explain extended select * from loc_orc POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -51,22 +64,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: string + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE ListSink PREHOOK: query: insert overwrite table loc_orc partition(year) select * from loc_staging @@ -101,7 +104,20 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -192,22 +208,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 5 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 5 Data size: 727 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: string + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 5 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 5 Data size: 727 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- partition level analyze statistics for specific parition @@ -243,7 +249,25 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__')))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + TOK_TABLE_OR_COL + year + '__HIVE_DEFAULT_PARTITION__' + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -295,22 +319,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 2 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2 Data size: 325 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: string + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 2 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2 Data size: 325 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: PARTIAL colStatState: NONE @@ -326,7 +340,20 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -417,22 +444,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 9 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 9 Data size: 727 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: string + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 9 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 9 Data size: 727 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE @@ -448,7 +465,25 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL year) '2001')))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + TOK_TABLE_OR_COL + year + '2001' + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -500,22 +535,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 7 dataSize: 402 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: string + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 7 dataSize: 402 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- partition level analyze statistics for all partitions @@ -555,7 +580,25 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__')))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + TOK_TABLE_OR_COL + year + '__HIVE_DEFAULT_PARTITION__' + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -607,22 +650,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: string + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE @@ -638,7 +671,20 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -729,22 +775,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: string + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE @@ -760,7 +796,30 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (= (TOK_TABLE_OR_COL year) '2001') (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + or + = + TOK_TABLE_OR_COL + year + '2001' + = + TOK_TABLE_OR_COL + year + '__HIVE_DEFAULT_PARTITION__' + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -851,22 +910,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: string + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- both partitions will be pruned @@ -884,7 +933,30 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL year) '2001') (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + = + TOK_TABLE_OR_COL + year + '2001' + = + TOK_TABLE_OR_COL + year + '__HIVE_DEFAULT_PARTITION__' + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -896,29 +968,16 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((year = '2001') and (year = '__HIVE_DEFAULT_PARTITION__')) - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + predicate: ((year = '2001') and (year = '__HIVE_DEFAULT_PARTITION__')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: string + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE ListSink PREHOOK: query: -- partition level partial column statistics @@ -952,7 +1011,21 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL zip))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + zip + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -961,27 +1034,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: zip - type: bigint + expressions: zip (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1103,7 +1170,21 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1112,27 +1193,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Select Operator - expressions: - expr: state - type: string + expressions: state (type: string) outputColumnNames: _col0 - Statistics: - numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1256,7 +1331,24 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1265,29 +1357,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int + expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1409,7 +1493,29 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_WHERE (= (TOK_TABLE_OR_COL year) '2001')))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_WHERE + = + TOK_TABLE_OR_COL + year + '2001' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1418,29 +1524,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 7 dataSize: 402 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int + expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 7 dataSize: 630 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 7 Data size: 630 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 7 dataSize: 630 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 7 Data size: 630 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1520,7 +1618,29 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_WHERE (!= (TOK_TABLE_OR_COL year) '2001')))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_WHERE + != + TOK_TABLE_OR_COL + year + '2001' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1529,29 +1649,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int + expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1631,7 +1743,20 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1722,22 +1847,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: string + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- This is to test filter expression evaluation on partition column @@ -1755,7 +1870,31 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_WHERE (and (> (TOK_TABLE_OR_COL locid) 0) (= (TOK_TABLE_OR_COL year) '2001'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_WHERE + and + > + TOK_TABLE_OR_COL + locid + 0 + = + TOK_TABLE_OR_COL + year + '2001' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1764,34 +1903,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 7 dataSize: 402 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (locid > 0) - type: boolean - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: (locid > 0) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: locid - type: int + expressions: locid (type: int) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1869,7 +1999,34 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL locid)) (TOK_SELEXPR (TOK_TABLE_OR_COL year))) (TOK_WHERE (and (> (TOK_TABLE_OR_COL locid) 0) (= (TOK_TABLE_OR_COL year) '2001'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_SELEXPR + TOK_TABLE_OR_COL + year + TOK_WHERE + and + > + TOK_TABLE_OR_COL + locid + 0 + = + TOK_TABLE_OR_COL + year + '2001' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1878,36 +2035,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 7 dataSize: 402 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (locid > 0) - type: boolean - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: (locid > 0) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: locid - type: int - expr: year - type: string + expressions: locid (type: int), year (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 2 dataSize: 376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1985,7 +2131,45 @@ POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMP POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL locid)) (TOK_SELEXPR (TOK_TABLE_OR_COL year))))) test)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL locid) 0) (= (TOK_TABLE_OR_COL year) '2001'))))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_SELEXPR + TOK_TABLE_OR_COL + year + test + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + > + TOK_TABLE_OR_COL + locid + 0 + = + TOK_TABLE_OR_COL + year + '2001' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1994,36 +2178,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test:loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 7 dataSize: 402 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (locid > 0) - type: boolean - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: (locid > 0) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: locid - type: int - expr: year - type: string + expressions: locid (type: int), year (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/annotate_stats_select.q.out ql/src/test/results/clientpositive/annotate_stats_select.q.out index 480f5cf..023b1c3 100644 --- ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -103,7 +103,20 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -115,44 +128,12 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: bo1 - type: boolean - expr: ti1 - type: tinyint - expr: si1 - type: smallint - expr: i1 - type: int - expr: bi1 - type: bigint - expr: f1 - type: float - expr: d1 - type: double - expr: de1 - type: decimal(10,0) - expr: ts1 - type: timestamp - expr: da1 - type: timestamp - expr: s1 - type: string - expr: vc1 - type: varchar(5) - expr: m1 - type: map - expr: l1 - type: array - expr: st1 - type: struct + expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: map), l1 (type: array), st1 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- statistics for complex types are not supported yet @@ -202,7 +183,20 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -214,44 +208,12 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Select Operator - expressions: - expr: bo1 - type: boolean - expr: ti1 - type: tinyint - expr: si1 - type: smallint - expr: i1 - type: int - expr: bi1 - type: bigint - expr: f1 - type: float - expr: d1 - type: double - expr: de1 - type: decimal(10,0) - expr: ts1 - type: timestamp - expr: da1 - type: timestamp - expr: s1 - type: string - expr: vc1 - type: varchar(5) - expr: m1 - type: map - expr: l1 - type: array - expr: st1 - type: struct + expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: map), l1 (type: array), st1 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 8 @@ -276,7 +238,21 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bo1))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + bo1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -285,27 +261,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: bo1 - type: boolean + expressions: bo1 (type: boolean) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -403,7 +373,22 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1) int1)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + i1 + int1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -412,27 +397,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: i1 - type: int + expressions: i1 (type: int) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -528,7 +507,21 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL s1))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + s1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -537,27 +530,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: s1 - type: string + expressions: s1 (type: string) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 174 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 174 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -655,7 +642,21 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + m1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -664,27 +665,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: m1 - type: map + expressions: m1 (type: map) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -780,7 +775,42 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bo1)) (TOK_SELEXPR (TOK_TABLE_OR_COL ti1)) (TOK_SELEXPR (TOK_TABLE_OR_COL si1)) (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR (TOK_TABLE_OR_COL bi1)) (TOK_SELEXPR (TOK_TABLE_OR_COL f1)) (TOK_SELEXPR (TOK_TABLE_OR_COL d1)) (TOK_SELEXPR (TOK_TABLE_OR_COL s1))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + bo1 + TOK_SELEXPR + TOK_TABLE_OR_COL + ti1 + TOK_SELEXPR + TOK_TABLE_OR_COL + si1 + TOK_SELEXPR + TOK_TABLE_OR_COL + i1 + TOK_SELEXPR + TOK_TABLE_OR_COL + bi1 + TOK_SELEXPR + TOK_TABLE_OR_COL + f1 + TOK_SELEXPR + TOK_TABLE_OR_COL + d1 + TOK_SELEXPR + TOK_TABLE_OR_COL + s1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -789,41 +819,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: bo1 - type: boolean - expr: ti1 - type: tinyint - expr: si1 - type: smallint - expr: i1 - type: int - expr: bi1 - type: bigint - expr: f1 - type: float - expr: d1 - type: double - expr: s1 - type: string + expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), s1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: - numRows: 2 dataSize: 246 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 246 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 246 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 246 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -919,7 +929,20 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_NULL)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_NULL + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -928,27 +951,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: null - type: string + expressions: null (type: string) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1044,7 +1061,20 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 11)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + 11 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1053,27 +1083,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: 11 - type: int + expressions: 11 (type: int) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1169,7 +1193,20 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 11L)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + 11L + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1178,27 +1215,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: 11 - type: bigint + expressions: 11 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1294,7 +1325,20 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 11.0)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + 11.0 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1303,27 +1347,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: 11.0 - type: double + expressions: 11.0 (type: double) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1419,7 +1457,20 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR "hello")))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + "hello" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1428,27 +1479,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: 'hello' - type: string + expressions: 'hello' (type: string) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1542,7 +1587,23 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION (TOK_CHAR 5) "hello"))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + TOK_CHAR + 5 + "hello" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1551,27 +1612,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: CAST( 'hello' AS CHAR(5) - type: char(5) + expressions: CAST( 'hello' AS CHAR(5) (type: char(5)) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1665,7 +1720,23 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION (TOK_VARCHAR 5) "hello"))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + TOK_VARCHAR + 5 + "hello" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1674,27 +1745,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: CAST( 'hello' AS varchar(5)) - type: varchar(5) + expressions: CAST( 'hello' AS varchar(5)) (type: varchar(5)) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1790,7 +1855,22 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION unbase64 "0xe23"))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + unbase64 + "0xe23" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1799,27 +1879,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: unbase64('0xe23') - type: binary + expressions: unbase64('0xe23') (type: binary) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 96 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 96 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1915,7 +1989,26 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_TINYINT "1")) (TOK_SELEXPR (TOK_FUNCTION TOK_SMALLINT "20"))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + TOK_TINYINT + "1" + TOK_SELEXPR + TOK_FUNCTION + TOK_SMALLINT + "20" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1924,29 +2017,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: UDFToByte('1') - type: tinyint - expr: UDFToShort('20') - type: smallint + expressions: UDFToByte('1') (type: tinyint), UDFToShort('20') (type: smallint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2042,7 +2127,22 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP "1970-12-31 15:59:58.174"))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + TOK_TIMESTAMP + "1970-12-31 15:59:58.174" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2051,27 +2151,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: CAST( '1970-12-31 15:59:58.174' AS TIMESTAMP) - type: timestamp + expressions: CAST( '1970-12-31 15:59:58.174' AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 80 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 80 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2167,7 +2261,22 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DATE "1970-12-31 15:59:58.174"))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + TOK_DATE + "1970-12-31 15:59:58.174" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2176,27 +2285,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: CAST( '1970-12-31 15:59:58.174' AS DATE) - type: date + expressions: CAST( '1970-12-31 15:59:58.174' AS DATE) (type: date) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2292,7 +2395,22 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DECIMAL "58.174"))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + TOK_DECIMAL + "58.174" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2301,27 +2419,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: CAST( '58.174' AS decimal(10,0)) - type: decimal(10,0) + expressions: CAST( '58.174' AS decimal(10,0)) (type: decimal(10,0)) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 224 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 224 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2417,7 +2529,24 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION array 1 2 3))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + array + 1 + 2 + 3 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2426,27 +2555,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: array(1,2,3) - type: array + expressions: array(1,2,3) (type: array) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2542,7 +2665,24 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION str_to_map "a=1 b=2 c=3" " " "="))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + str_to_map + "a=1 b=2 c=3" + " " + "=" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2551,27 +2691,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: str_to_map('a=1 b=2 c=3',' ','=') - type: map + expressions: str_to_map('a=1 b=2 c=3',' ','=') (type: map) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 1508 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 1508 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2667,7 +2801,25 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION NAMED_STRUCT "a" 11 "b" 11))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + NAMED_STRUCT + "a" + 11 + "b" + 11 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2676,27 +2828,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: named_struct('a',11,'b',11) - type: struct + expressions: named_struct('a',11,'b',11) (type: struct) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2792,7 +2938,23 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION CREATE_UNION 0 "hello"))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + CREATE_UNION + 0 + "hello" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2801,27 +2963,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: create_union(0,'hello') - type: uniontype + expressions: create_union(0,'hello') (type: uniontype) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 250 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 250 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 250 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2919,7 +3075,21 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2928,32 +3098,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3010,27 +3171,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3075,7 +3229,22 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + count + 1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -3084,32 +3253,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3166,27 +3326,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3231,7 +3384,22 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR 11)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_SELEXPR + 11 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -3240,57 +3408,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Select Operator - expressions: - expr: bo1 - type: boolean - expr: ti1 - type: tinyint - expr: si1 - type: smallint - expr: i1 - type: int - expr: bi1 - type: bigint - expr: f1 - type: float - expr: d1 - type: double - expr: de1 - type: decimal(10,0) - expr: ts1 - type: timestamp - expr: da1 - type: timestamp - expr: s1 - type: string - expr: vc1 - type: varchar(5) - expr: m1 - type: map - expr: l1 - type: array - expr: st1 - type: struct - expr: 11 - type: int + expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: map), l1 (type: array), st1 (type: struct), 11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: - numRows: 2 dataSize: 428 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 2 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 428 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 2 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3390,7 +3522,35 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1))) (TOK_LIMIT 10))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1))))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + i1 + TOK_LIMIT + 10 + temp + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + i1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -3399,31 +3559,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - temp:alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: i1 - type: int + expressions: i1 (type: int) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Number of rows: 10 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: int + value expressions: _col0 (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3480,25 +3632,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Number of rows: 10 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3543,7 +3690,37 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR 11)) (TOK_LIMIT 10))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1))))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + i1 + TOK_SELEXPR + 11 + TOK_LIMIT + 10 + temp + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + i1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -3552,35 +3729,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - temp:alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: i1 - type: int - expr: 11 - type: int + expressions: i1 (type: int), 11 (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Number of rows: 10 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int + value expressions: _col0 (type: int), _col1 (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3637,25 +3802,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Number of rows: 10 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3700,7 +3860,39 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR 11)) (TOK_LIMIT 10))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR "hello")))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + i1 + TOK_SELEXPR + 11 + TOK_LIMIT + 10 + temp + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + i1 + TOK_SELEXPR + "hello" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -3709,35 +3901,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - temp:alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: i1 - type: int - expr: 11 - type: int + expressions: i1 (type: int), 11 (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Number of rows: 10 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int + value expressions: _col0 (type: int), _col1 (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3794,27 +3974,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Number of rows: 10 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: int - expr: 'hello' - type: string + expressions: _col0 (type: int), 'hello' (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3859,7 +4032,38 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR 11.0 x)) (TOK_LIMIT 10))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x))))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + i1 + TOK_SELEXPR + 11.0 + x + TOK_LIMIT + 10 + temp + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + x + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -3868,35 +4072,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - temp:alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: i1 - type: int - expr: 11.0 - type: double + expressions: i1 (type: int), 11.0 (type: double) outputColumnNames: _col0, _col1 - Statistics: - numRows: 2 dataSize: 24 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 2 dataSize: 24 basicStatsState: COMPLETE colStatsState: COMPLETE + Number of rows: 10 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 2 dataSize: 24 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: double + value expressions: _col0 (type: int), _col1 (type: double) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3953,25 +4145,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 2 dataSize: 24 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 2 dataSize: 24 basicStatsState: COMPLETE colStatsState: COMPLETE + Number of rows: 10 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col1 - type: double + expressions: _col1 (type: double) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4016,7 +4203,43 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1) x) (TOK_SELEXPR (TOK_FUNCTION unbase64 "0xe23") ub)) (TOK_LIMIT 10))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x)) (TOK_SELEXPR "hello")))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + i1 + x + TOK_SELEXPR + TOK_FUNCTION + unbase64 + "0xe23" + ub + TOK_LIMIT + 10 + temp + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + x + TOK_SELEXPR + "hello" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -4025,35 +4248,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - temp:alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: i1 - type: int - expr: unbase64('0xe23') - type: binary + expressions: i1 (type: int), unbase64('0xe23') (type: binary) outputColumnNames: _col0, _col1 - Statistics: - numRows: 2 dataSize: 104 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 2 dataSize: 104 basicStatsState: COMPLETE colStatsState: COMPLETE + Number of rows: 10 + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 2 dataSize: 104 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: binary + value expressions: _col0 (type: int), _col1 (type: binary) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4110,27 +4321,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 2 dataSize: 104 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 2 dataSize: 104 basicStatsState: COMPLETE colStatsState: COMPLETE + Number of rows: 10 + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: int - expr: 'hello' - type: string + expressions: _col0 (type: int), 'hello' (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4177,7 +4381,55 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR "hello" hell)) (TOK_LIMIT 10))) in1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL hell) h)) (TOK_LIMIT 10))) in2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL h)) (TOK_SELEXPR 11.0)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + i1 + TOK_SELEXPR + "hello" + hell + TOK_LIMIT + 10 + in1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + hell + h + TOK_LIMIT + 10 + in2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + h + TOK_SELEXPR + 11.0 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -4187,35 +4439,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - in2:in1:alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: i1 - type: int - expr: 'hello' - type: string + expressions: i1 (type: int), 'hello' (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Number of rows: 10 + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + value expressions: _col0 (type: int), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4272,21 +4512,17 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Number of rows: 10 + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Number of rows: 10 + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 @@ -4307,18 +4543,14 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Reduce Output Operator sort order: - Statistics: - numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: string + value expressions: _col0 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4347,27 +4579,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Limit - Statistics: - numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Number of rows: 10 + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: 11.0 - type: double + expressions: _col0 (type: string), 11.0 (type: double) outputColumnNames: _col0, _col1 - Statistics: - numRows: 2 dataSize: 194 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 194 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4412,7 +4637,24 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bo1))) (TOK_WHERE (TOK_TABLE_OR_COL bo1)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + bo1 + TOK_WHERE + TOK_TABLE_OR_COL + bo1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -4421,34 +4663,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: bo1 - type: boolean - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + predicate: bo1 (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: bo1 - type: boolean + expressions: bo1 (type: boolean) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4544,7 +4777,25 @@ POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name: POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypes_orc.vc1 SIMPLE [(alltypes)alltypes.FieldSchema(name:vc1, type:varchar(5), comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bo1))) (TOK_WHERE (! (TOK_TABLE_OR_COL bo1))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + alltypes_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + bo1 + TOK_WHERE + ! + TOK_TABLE_OR_COL + bo1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -4553,34 +4804,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypes_orc + Map Operator Tree: TableScan alias: alltypes_orc - Statistics: - numRows: 2 dataSize: 1686 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (not bo1) - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + predicate: (not bo1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator - expressions: - expr: bo1 - type: boolean + expressions: bo1 (type: boolean) outputColumnNames: _col0 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/annotate_stats_table.q.out ql/src/test/results/clientpositive/annotate_stats_table.q.out index 56ff79e..89fa6b1 100644 --- ql/src/test/results/clientpositive/annotate_stats_table.q.out +++ ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -29,7 +29,20 @@ POSTHOOK: query: -- basicStatState: NONE colStatState: NONE explain extended select * from emp_orc POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + emp_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -41,18 +54,12 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: lastname - type: string - expr: deptid - type: int + expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE ListSink PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging @@ -84,7 +91,20 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + emp_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -96,18 +116,12 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: - numRows: 3 dataSize: 349 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 349 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: lastname - type: string - expr: deptid - type: int + expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 349 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 349 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- table level analyze statistics @@ -131,7 +145,20 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + emp_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -143,18 +170,12 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: - numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: lastname - type: string - expr: deptid - type: int + expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- column level partial statistics @@ -178,7 +199,20 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + emp_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -190,18 +224,12 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: - numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Select Operator - expressions: - expr: lastname - type: string - expr: deptid - type: int + expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: PARTIAL + Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- all selected columns have statistics @@ -215,7 +243,21 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL deptid))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + emp_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + deptid + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -224,27 +266,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - emp_orc + Map Operator Tree: TableScan alias: emp_orc - Statistics: - numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: deptid - type: int + expressions: deptid (type: int) outputColumnNames: _col0 - Statistics: - numRows: 6 dataSize: 20 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 6 dataSize: 20 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -335,7 +371,20 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + emp_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -347,18 +396,12 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: - numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: lastname - type: string - expr: deptid - type: int + expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE @@ -370,7 +413,21 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL lastname))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + emp_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + lastname + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -379,27 +436,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - emp_orc + Map Operator Tree: TableScan alias: emp_orc - Statistics: - numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: lastname - type: string + expressions: lastname (type: string) outputColumnNames: _col0 - Statistics: - numRows: 6 dataSize: 546 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 6 dataSize: 546 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -478,7 +529,21 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL deptid))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + emp_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + deptid + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -487,27 +552,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - emp_orc + Map Operator Tree: TableScan alias: emp_orc - Statistics: - numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: deptid - type: int + expressions: deptid (type: int) outputColumnNames: _col0 - Statistics: - numRows: 6 dataSize: 20 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 6 dataSize: 20 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -586,7 +645,24 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL lastname)) (TOK_SELEXPR (TOK_TABLE_OR_COL deptid))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + emp_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + lastname + TOK_SELEXPR + TOK_TABLE_OR_COL + deptid + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -595,29 +671,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - emp_orc + Map Operator Tree: TableScan alias: emp_orc - Statistics: - numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 349 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: lastname - type: string - expr: deptid - type: int + expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 6 dataSize: 566 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 566 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 6 dataSize: 566 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 6 Data size: 566 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/annotate_stats_union.q.out ql/src/test/results/clientpositive/annotate_stats_union.q.out index 9dd2434..df1e386 100644 --- ql/src/test/results/clientpositive/annotate_stats_union.q.out +++ ql/src/test/results/clientpositive/annotate_stats_union.q.out @@ -67,7 +67,21 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -76,27 +90,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string + expressions: state (type: string) outputColumnNames: _col0 - Statistics: - numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -177,7 +185,46 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_UNION + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + tmp + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -186,37 +233,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:tmp-subquery1:loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string + expressions: state (type: string) outputColumnNames: _col0 - Statistics: - numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -232,36 +269,26 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - null-subquery2:tmp-subquery2:loc_orc TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string + expressions: state (type: string) outputColumnNames: _col0 - Statistics: - numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -342,7 +369,20 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -354,22 +394,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: -- numRows: 16 rawDataSize: 1592 @@ -383,7 +413,44 @@ POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(na POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_UNION + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + tmp + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -392,49 +459,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:tmp-subquery1:loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: - numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: bigint - expr: _col3 - type: int + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint), _col3 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -450,48 +495,26 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - null-subquery2:tmp-subquery2:loc_orc TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string - expr: locid - type: int - expr: zip - type: bigint - expr: year - type: int + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: - numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: bigint - expr: _col3 - type: int + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint), _col3 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -706,7 +729,48 @@ POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(nam POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME default loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)))))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_UNION + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + default + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + test + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + temp + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -715,37 +779,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:temp-subquery1:loc_orc + Map Operator Tree: TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string + expressions: state (type: string) outputColumnNames: _col0 - Statistics: - numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -761,36 +815,26 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - null-subquery2:temp-subquery2:loc_orc TableScan alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string + expressions: state (type: string) outputColumnNames: _col0 - Statistics: - numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -920,7 +964,48 @@ POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(nam POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test loc_staging))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)))))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_UNION + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + test + loc_staging + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + test + loc_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + temp + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -929,37 +1014,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:temp-subquery1:loc_staging + Map Operator Tree: TableScan - alias: loc_staging - Statistics: - numRows: 8 dataSize: 117 basicStatsState: COMPLETE colStatsState: COMPLETE + alias: loc_orc + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string + expressions: state (type: string) outputColumnNames: _col0 - Statistics: - numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -975,36 +1050,26 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - null-subquery2:temp-subquery2:loc_orc TableScan - alias: loc_orc - Statistics: - numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + alias: loc_staging + Statistics: Num rows: 8 Data size: 117 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: state - type: string + expressions: state (type: string) outputColumnNames: _col0 - Statistics: - numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 16 Data size: 1376 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/auto_smb_mapjoin_14.q.out index 8220c2c..2f18bbc 100644 --- ql/src/test/results/clientpositive/auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/auto_smb_mapjoin_14.q.out @@ -50,9 +50,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -60,49 +57,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 0 + 0 key (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -157,9 +142,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-3 depends on stages: Stage-1 @@ -168,68 +150,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -237,30 +197,21 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -343,9 +294,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) cnt1))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) cnt1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-3 depends on stages: Stage-1, Stage-5 @@ -356,68 +304,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -425,37 +349,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -463,20 +369,12 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -484,31 +382,16 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint + key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -516,68 +399,44 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - src2:subq2:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -659,9 +518,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -669,58 +525,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -785,9 +627,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -795,58 +634,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -935,9 +760,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq4) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL subq4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -945,58 +767,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1075,9 +883,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1085,58 +890,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 8) - type: boolean + predicate: (key < 8) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1191,9 +982,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) 1) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) 1) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1202,41 +990,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key + 1) - type: int + expressions: (key + 1) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - subq2:a + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key + 1) - type: int + expressions: (key + 1) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1244,17 +1022,16 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1262,30 +1039,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1336,9 +1108,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_TABREF (TOK_TABNAME tbl2) a) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL a) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1346,58 +1115,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1454,9 +1209,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq3) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1464,19 +1216,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -1485,40 +1235,28 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - 2 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1591,9 +1329,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) value2)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1601,58 +1336,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:subq2:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1737,9 +1458,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val1)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val1)) (TOK_SELEXPR (TOK_TABLE_OR_COL val2))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 @@ -1760,58 +1478,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 2 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1842,12 +1540,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1856,12 +1552,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1898,12 +1592,10 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1912,12 +1604,10 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2095,9 +1785,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val1)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 @@ -2113,90 +1800,57 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 2 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2227,12 +1881,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2241,12 +1893,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out index 2f03c0b..e84e7b2 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out @@ -84,7 +84,36 @@ POSTHOOK: query: -- Since size is being used to find the big table, the order of explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -93,12 +122,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 116 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -106,24 +133,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -221,15 +243,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -278,7 +296,36 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_big) a) (TOK_TABREF (TOK_TABNAME bucket_small) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -287,12 +334,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 116 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -300,24 +345,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -415,15 +455,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -472,7 +508,36 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_big) a) (TOK_TABREF (TOK_TABNAME bucket_small) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-3, Stage-4, Stage-1 @@ -487,8 +552,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a GatherStats: false @@ -498,24 +562,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -710,15 +769,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -743,8 +798,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b GatherStats: false @@ -754,24 +808,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -1009,15 +1058,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -1042,12 +1087,10 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 116 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -1055,24 +1098,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1170,15 +1208,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out index 75a32d5..1f64f8e 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out @@ -64,9 +64,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6))))) usubq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-6 is a root stage Stage-5 depends on stages: Stage-6 @@ -84,114 +81,99 @@ STAGE PLANS: subq2:a TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - subq1-subquery1:usubq1-subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - subq1-subquery2:usubq1-subquery2:a TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -201,30 +183,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -287,9 +264,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-6 depends on stages: Stage-1 @@ -299,58 +273,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() + aggregations: count() bucketGroup: true - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -366,29 +325,25 @@ STAGE PLANS: subq2:a TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -396,41 +351,36 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out index 2a9eb54..8ac2c06 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out @@ -80,7 +80,36 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-2 is a root stage @@ -89,12 +118,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 116 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -102,32 +129,23 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 - Statistics: - numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: - numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -181,8 +199,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 1 dataSize: 114 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -322,27 +339,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -393,7 +403,36 @@ POSTHOOK: query: -- Since size is being used to find the big table, the order of explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-2 is a root stage @@ -402,12 +441,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 116 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -415,32 +452,23 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 - Statistics: - numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: - numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -494,8 +522,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 1 dataSize: 114 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -635,27 +662,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -700,7 +720,41 @@ POSTHOOK: query: -- The join is converted to a bucketed mapjoin with a mapjoin h explain extended select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -709,12 +763,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 116 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -722,33 +774,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 - Statistics: - numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -801,8 +844,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 1 dataSize: 114 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -906,27 +948,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out index 05d2236..d462218 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out @@ -108,7 +108,64 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_medium) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME bucket_big) c) (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME bucket_medium) d) (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_medium + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_TABREF + TOK_TABNAME + bucket_big + c + = + . + TOK_TABLE_OR_COL + c + key + . + TOK_TABLE_OR_COL + b + key + TOK_TABREF + TOK_TABNAME + bucket_medium + d + = + . + TOK_TABLE_OR_COL + c + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-2 is a root stage @@ -117,12 +174,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c - Statistics: - numRows: 116 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -132,46 +187,35 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) Position of Big Table: 2 - Statistics: - numRows: 255 dataSize: 25572 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [] - 1 [] + 0 + 1 Position of Big Table: 0 - Statistics: - numRows: 280 dataSize: 28129 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 280 Data size: 28129 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: - numRows: 280 dataSize: 28129 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 280 Data size: 28129 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -320,20 +364,17 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 1 dataSize: 114 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false b TableScan alias: b - Statistics: - numRows: 1 dataSize: 170 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE GatherStats: false d TableScan alias: d - Statistics: - numRows: 0 dataSize: 170 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 170 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -520,27 +561,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out index 58a3036..d0eb39c 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out @@ -70,9 +70,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) value2)))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key1)) (TOK_SELEXPR (TOK_TABLE_OR_COL key2)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value1)) (TOK_SELEXPR (TOK_TABLE_OR_COL value2))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -83,58 +80,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - subq:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Select Operator - expressions: - expr: _col0 - type: int - expr: _col2 - type: int + expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator - expressions: - expr: _col1 - type: string - expr: _col3 - type: string + expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 2 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -299,9 +276,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) value2)))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key1)) (TOK_SELEXPR (TOK_TABLE_OR_COL key2)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value1)) (TOK_SELEXPR (TOK_TABLE_OR_COL value2))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -312,58 +286,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - subq:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Select Operator - expressions: - expr: _col0 - type: int - expr: _col2 - type: int + expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator - expressions: - expr: _col1 - type: string - expr: _col3 - type: string + expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 2 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -544,9 +498,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) value2)))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key1)) (TOK_SELEXPR (TOK_TABLE_OR_COL key2)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value1)) (TOK_SELEXPR (TOK_TABLE_OR_COL value2))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -557,58 +508,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - subq:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Select Operator - expressions: - expr: _col0 - type: int - expr: _col2 - type: int + expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator - expressions: - expr: _col1 - type: string - expr: _col3 - type: string + expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 2 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_14.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_14.q.out index 8652df9..917b895 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_14.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_14.q.out @@ -42,9 +42,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-4 is a root stage , consists of Stage-3, Stage-1 Stage-3 has a backup stage: Stage-1 @@ -57,8 +54,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -67,24 +63,17 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 0 + 0 key (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -97,19 +86,14 @@ STAGE PLANS: alias: b Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -117,49 +101,37 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 0 + 0 key (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -212,9 +184,6 @@ POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-4 is a root stage , consists of Stage-3, Stage-1 Stage-3 has a backup stage: Stage-1 @@ -227,8 +196,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -237,24 +205,17 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 1 + 0 key (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -267,19 +228,14 @@ STAGE PLANS: alias: a Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -287,49 +243,37 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 189 Data size: 1891 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Right Outer Join0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 1 + 0 key (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_15.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_15.q.out index 199a5d9..53a9a7f 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_15.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_15.q.out @@ -40,9 +40,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-4 is a root stage , consists of Stage-3, Stage-1 Stage-3 has a backup stage: Stage-1 @@ -55,8 +52,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -65,24 +61,17 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 0 + 0 key (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -95,19 +84,14 @@ STAGE PLANS: alias: b Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -115,49 +99,37 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 0 + 0 key (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -177,9 +149,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-4 is a root stage , consists of Stage-3, Stage-1 Stage-3 has a backup stage: Stage-1 @@ -192,8 +161,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -202,24 +170,17 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 1 + 0 key (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -232,19 +193,14 @@ STAGE PLANS: alias: a Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -252,49 +208,37 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Right Outer Join0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 1 + 0 key (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out index 4dd6181..0488485 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out @@ -68,7 +68,36 @@ POSTHOOK: query: -- Since the leftmost table is assumed as the big table, arrang explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_big) a) (TOK_TABREF (TOK_TABNAME bucket_small) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -77,12 +106,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 54 dataSize: 5500 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -90,24 +117,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -205,15 +227,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -264,7 +282,36 @@ POSTHOOK: query: -- The mapjoin should fail resulting in the sort-merge join explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_big) a) (TOK_TABREF (TOK_TABNAME bucket_small) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-3, Stage-4, Stage-1 @@ -279,8 +326,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a GatherStats: false @@ -290,24 +336,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -502,15 +543,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -535,8 +572,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b GatherStats: false @@ -546,24 +582,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -801,15 +832,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -834,12 +861,10 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 54 dataSize: 5500 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -847,24 +872,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -962,15 +982,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out index f718548..1537f65 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out @@ -68,7 +68,36 @@ POSTHOOK: query: -- Since size is being used to find the big table, the order of explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -77,12 +106,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -90,24 +117,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -160,15 +182,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -217,7 +235,36 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_big) a) (TOK_TABREF (TOK_TABNAME bucket_small) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -226,12 +273,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -239,24 +284,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -309,15 +349,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -366,7 +402,36 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_big) a) (TOK_TABREF (TOK_TABNAME bucket_small) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-3, Stage-4, Stage-1 @@ -381,8 +446,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a GatherStats: false @@ -392,24 +456,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -644,15 +703,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -677,8 +732,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b GatherStats: false @@ -688,24 +742,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -900,15 +949,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -933,12 +978,10 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -946,24 +989,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1016,15 +1054,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out index 036323f..6dd49c4 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out @@ -80,7 +80,36 @@ POSTHOOK: query: -- Since size is being used to find the big table, the order of explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -89,12 +118,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 27 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -102,24 +129,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -172,15 +194,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -229,7 +247,36 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_big) a) (TOK_TABREF (TOK_TABNAME bucket_small) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -238,12 +285,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 27 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -251,24 +296,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -321,15 +361,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -378,7 +414,36 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_big) a) (TOK_TABREF (TOK_TABNAME bucket_small) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-3, Stage-4, Stage-1 @@ -393,8 +458,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a GatherStats: false @@ -404,24 +468,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -656,15 +715,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -689,8 +744,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b GatherStats: false @@ -700,24 +754,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -912,15 +961,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -945,12 +990,10 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 27 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -958,24 +1001,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1028,15 +1066,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out index 6b2e719..0f4f59f 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out @@ -53,7 +53,36 @@ POSTHOOK: query: -- Since size is being used to find the big table, the order of explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -62,12 +91,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 27 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -75,24 +102,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -147,15 +169,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -198,7 +216,36 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_big) a) (TOK_TABREF (TOK_TABNAME bucket_small) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -207,12 +254,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 27 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -220,24 +265,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -292,15 +332,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -343,7 +379,36 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_big) a) (TOK_TABREF (TOK_TABNAME bucket_small) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-3, Stage-4, Stage-1 @@ -358,8 +423,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a GatherStats: false @@ -369,24 +433,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -490,15 +549,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -523,8 +578,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b GatherStats: false @@ -534,24 +588,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -655,15 +704,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -688,12 +733,10 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 27 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -701,24 +744,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -773,15 +811,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out index 0d1257f..f137e4e 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out @@ -96,9 +96,6 @@ POSTHOOK: Lineage: tbl3.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME src) c) (= (. (TOK_TABLE_OR_COL c) value) (. (TOK_TABLE_OR_COL a) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -107,43 +104,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col1 - Position of Big Table: 0 Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - c - TableScan - alias: c - Reduce Output Operator - key expressions: - expr: value - type: string - sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 1 + Map-reduce partition columns: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -151,17 +137,13 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -169,30 +151,21 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -241,9 +214,6 @@ POSTHOOK: Lineage: tbl3.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME src) d) (= (. (TOK_TABLE_OR_COL d) value) (. (TOK_TABLE_OR_COL a) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -252,43 +222,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col1 - Position of Big Table: 0 Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - d - TableScan - alias: d - Reduce Output Operator - key expressions: - expr: value - type: string - sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 1 + Map-reduce partition columns: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -296,17 +255,13 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -314,30 +269,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -386,9 +332,6 @@ POSTHOOK: Lineage: tbl3.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME src) h) (= (. (TOK_TABLE_OR_COL h) value) (. (TOK_TABLE_OR_COL a) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-3 depends on stages: Stage-1 @@ -397,43 +340,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col1 - Position of Big Table: 0 Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - h + Map-reduce partition columns: _col1 (type: string) TableScan alias: h + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: value (type: string) sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 1 + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -441,17 +373,13 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -459,30 +387,21 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -527,9 +446,6 @@ POSTHOOK: Lineage: tbl3.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME src) c) (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL a) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-8 is a root stage , consists of Stage-5, Stage-6, Stage-7, Stage-1 Stage-5 has a backup stage: Stage-1 @@ -545,8 +461,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -557,22 +472,17 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [GenericUDFBridge(Column[key])] - 1 [GenericUDFBridge(Column[key])] - 2 [GenericUDFBridge(Column[key])] - Position of Big Table: 0 + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(key) (type: double) + 2 UDFToDouble(key) (type: double) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -596,30 +506,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -627,8 +532,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -639,22 +543,17 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [GenericUDFBridge(Column[key])] - 1 [GenericUDFBridge(Column[key])] - 2 [GenericUDFBridge(Column[key])] - Position of Big Table: 1 + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(key) (type: double) + 2 UDFToDouble(key) (type: double) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -678,8 +577,7 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c Map Join Operator @@ -690,22 +588,17 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [GenericUDFBridge(Column[key])] - 1 [GenericUDFBridge(Column[key])] - 2 [GenericUDFBridge(Column[key])] - Position of Big Table: 2 + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(key) (type: double) + 2 UDFToDouble(key) (type: double) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -729,43 +622,31 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(key) - type: double + key expressions: UDFToDouble(key) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(key) - type: double - tag: 0 - b + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: c + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(key) - type: double + key expressions: UDFToDouble(key) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(key) - type: double - tag: 1 - c + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan - alias: c + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(key) - type: double + key expressions: UDFToDouble(key) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(key) - type: double - tag: 2 + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -775,17 +656,16 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -830,9 +710,6 @@ POSTHOOK: Lineage: tbl3.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME tbl3) c) (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL a) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -840,10 +717,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -852,40 +729,28 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - Position of Big Table: 0 + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -930,9 +795,6 @@ POSTHOOK: Lineage: tbl3.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME tbl4) c) (= (. (TOK_TABLE_OR_COL c) value) (. (TOK_TABLE_OR_COL a) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -941,43 +803,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col1 - Position of Big Table: 0 Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - c - TableScan - alias: c - Reduce Output Operator - key expressions: - expr: value - type: string - sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 1 + Map-reduce partition columns: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -985,17 +836,13 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1003,30 +850,21 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1071,9 +909,6 @@ POSTHOOK: Lineage: tbl3.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME src) c) (= (. (TOK_TABLE_OR_COL c) value) (. (TOK_TABLE_OR_COL a) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -1082,43 +917,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col1 - Position of Big Table: 0 Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - c - TableScan - alias: c - Reduce Output Operator - key expressions: - expr: value - type: string - sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 1 + Map-reduce partition columns: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1126,17 +950,13 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1144,30 +964,21 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1212,9 +1023,6 @@ POSTHOOK: Lineage: tbl3.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME src) c) (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL a) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-8 is a root stage , consists of Stage-5, Stage-6, Stage-7, Stage-1 Stage-5 has a backup stage: Stage-1 @@ -1230,8 +1038,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -1242,22 +1049,17 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [GenericUDFBridge(Column[key])] - 1 [GenericUDFBridge(Column[key])] - 2 [GenericUDFBridge(Column[key])] - Position of Big Table: 0 + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(key) (type: double) + 2 UDFToDouble(key) (type: double) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1281,30 +1083,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1312,8 +1109,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -1324,22 +1120,17 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [GenericUDFBridge(Column[key])] - 1 [GenericUDFBridge(Column[key])] - 2 [GenericUDFBridge(Column[key])] - Position of Big Table: 1 + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(key) (type: double) + 2 UDFToDouble(key) (type: double) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1363,8 +1154,7 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c Map Join Operator @@ -1375,22 +1165,17 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [GenericUDFBridge(Column[key])] - 1 [GenericUDFBridge(Column[key])] - 2 [GenericUDFBridge(Column[key])] - Position of Big Table: 2 + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(key) (type: double) + 2 UDFToDouble(key) (type: double) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1414,43 +1199,31 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(key) - type: double + key expressions: UDFToDouble(key) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(key) - type: double - tag: 0 - b + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: c + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(key) - type: double + key expressions: UDFToDouble(key) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(key) - type: double - tag: 1 - c + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan - alias: c + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(key) - type: double + key expressions: UDFToDouble(key) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(key) - type: double - tag: 2 + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1460,17 +1233,16 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1515,9 +1287,6 @@ POSTHOOK: Lineage: tbl3.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME tbl3) c) (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL a) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-7 is a root stage , consists of Stage-4, Stage-5, Stage-6, Stage-1 Stage-4 has a backup stage: Stage-1 @@ -1532,8 +1301,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -1544,25 +1312,18 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - Position of Big Table: 0 + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -1581,19 +1342,14 @@ STAGE PLANS: alias: c Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1601,8 +1357,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -1613,25 +1368,18 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - Position of Big Table: 1 + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -1650,19 +1398,14 @@ STAGE PLANS: alias: c Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1670,8 +1413,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c Map Join Operator @@ -1682,25 +1424,18 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - Position of Big Table: 2 + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -1719,19 +1454,14 @@ STAGE PLANS: alias: b Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1739,10 +1469,10 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -1751,40 +1481,28 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - Position of Big Table: 0 + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1829,9 +1547,6 @@ POSTHOOK: Lineage: tbl3.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME tbl4) c) (= (. (TOK_TABLE_OR_COL c) value) (. (TOK_TABLE_OR_COL a) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -1840,43 +1555,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col1 - Position of Big Table: 0 Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - c - TableScan - alias: c - Reduce Output Operator - key expressions: - expr: value - type: string - sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 1 + Map-reduce partition columns: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1884,17 +1588,13 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1902,30 +1602,21 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out index d6c3dc6..b176c55 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out @@ -93,7 +93,36 @@ POSTHOOK: query: -- Since size is being used to find the big table, the order of explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -102,12 +131,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 54 dataSize: 5500 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -115,24 +142,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -230,15 +252,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -289,7 +307,36 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_big) a) (TOK_TABREF (TOK_TABNAME bucket_small) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -298,12 +345,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 54 dataSize: 5500 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -311,24 +356,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -426,15 +466,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -485,7 +521,36 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_big) a) (TOK_TABREF (TOK_TABNAME bucket_small) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-3, Stage-4, Stage-1 @@ -500,8 +565,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a GatherStats: false @@ -511,24 +575,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -808,15 +867,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -841,8 +896,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b GatherStats: false @@ -852,24 +906,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -1151,15 +1200,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -1184,12 +1229,10 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 54 dataSize: 5500 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -1197,24 +1240,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1312,15 +1350,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out index 153e223..5d2342c 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out @@ -93,7 +93,36 @@ POSTHOOK: query: -- Since size is being used to find the big table, the order of explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -102,12 +131,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 116 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -115,24 +142,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -230,15 +252,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -289,7 +307,36 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_big) a) (TOK_TABREF (TOK_TABNAME bucket_small) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -298,12 +345,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 116 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -311,24 +356,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -426,15 +466,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -487,7 +523,36 @@ POSTHOOK: query: -- The mapjoin should fail resulting in the sort-merge join explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_big) a) (TOK_TABREF (TOK_TABNAME bucket_small) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-3, Stage-4, Stage-1 @@ -502,8 +567,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a GatherStats: false @@ -513,24 +577,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -810,15 +869,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -843,8 +898,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b GatherStats: false @@ -854,24 +908,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -1153,15 +1202,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false @@ -1186,12 +1231,10 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 116 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -1199,24 +1242,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1314,15 +1352,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out index 7295490..3fc89ba 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out @@ -50,9 +50,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -60,49 +57,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 0 + 0 key (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -155,9 +140,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -166,68 +148,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -235,25 +193,16 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + value expressions: _col0 (type: int), _col1 (type: bigint) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -319,9 +268,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-3 depends on stages: Stage-1 @@ -330,68 +276,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -399,30 +323,21 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -505,9 +420,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) cnt1))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) cnt1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-9 depends on stages: Stage-1, Stage-5 , consists of Stage-7, Stage-8, Stage-3 @@ -521,68 +433,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -593,8 +481,7 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -602,24 +489,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -636,31 +514,16 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint + key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -668,8 +531,7 @@ STAGE PLANS: Stage: Stage-8 Map Reduce - Alias -> Map Operator Tree: - $INTNAME1 + Map Operator Tree: TableScan Map Join Operator condition map: @@ -677,24 +539,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -711,37 +564,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -749,20 +584,12 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -770,68 +597,44 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - src2:subq2:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -913,9 +716,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -923,58 +723,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1039,9 +825,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1049,58 +832,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1189,9 +958,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq4) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL subq4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1199,58 +965,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1329,9 +1081,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1339,58 +1088,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 8) - type: boolean + predicate: (key < 8) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1445,9 +1180,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) 1) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) 1) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -1464,73 +1196,65 @@ STAGE PLANS: subq2:a TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key + 1) - type: int + expressions: (key + 1) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key + 1) - type: int + expressions: (key + 1) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1581,9 +1305,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_TABREF (TOK_TABNAME tbl2) a) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL a) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1591,58 +1312,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1691,9 +1398,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1701,49 +1405,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 key (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1802,9 +1494,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq3) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1812,19 +1501,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -1833,40 +1520,28 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - 2 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1939,9 +1614,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) value2)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1949,58 +1621,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:subq2:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2061,9 +1719,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-3, Stage-4, Stage-1 Stage-3 has a backup stage: Stage-1 @@ -2077,8 +1732,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -2087,24 +1741,17 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 0 + 0 key (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -2117,19 +1764,14 @@ STAGE PLANS: alias: b Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2137,8 +1779,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - subq1:b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -2147,24 +1788,17 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 1 + 0 key (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -2177,19 +1811,14 @@ STAGE PLANS: alias: a Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2197,49 +1826,37 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 0 + 0 key (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2292,9 +1909,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-6 is a root stage , consists of Stage-4, Stage-5, Stage-1 Stage-4 has a backup stage: Stage-1 @@ -2309,8 +1923,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -2319,38 +1932,23 @@ STAGE PLANS: condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -2363,24 +1961,15 @@ STAGE PLANS: alias: b Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2388,25 +1977,16 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + value expressions: _col0 (type: int), _col1 (type: bigint) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2414,8 +1994,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - subq1:b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -2424,38 +2003,23 @@ STAGE PLANS: condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -2468,24 +2032,15 @@ STAGE PLANS: alias: a Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2493,68 +2048,44 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2620,9 +2151,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-6 is a root stage , consists of Stage-4, Stage-5, Stage-1 Stage-4 has a backup stage: Stage-1 @@ -2637,8 +2165,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -2647,38 +2174,23 @@ STAGE PLANS: condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -2691,24 +2203,17 @@ STAGE PLANS: alias: b Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2716,30 +2221,21 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2747,8 +2243,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -2757,38 +2252,23 @@ STAGE PLANS: condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -2801,24 +2281,17 @@ STAGE PLANS: alias: a Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2826,68 +2299,46 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2970,9 +2421,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) cnt1))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) cnt1))))) - STAGE DEPENDENCIES: Stage-12 is a root stage , consists of Stage-10, Stage-11, Stage-1 Stage-10 has a backup stage: Stage-1 @@ -2995,8 +2443,7 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: - src1:subq1:a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -3005,38 +2452,23 @@ STAGE PLANS: condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -3049,24 +2481,15 @@ STAGE PLANS: alias: b Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3077,8 +2500,7 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -3086,24 +2508,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3120,31 +2533,16 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint + key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3152,8 +2550,7 @@ STAGE PLANS: Stage: Stage-8 Map Reduce - Alias -> Map Operator Tree: - $INTNAME1 + Map Operator Tree: TableScan Map Join Operator condition map: @@ -3161,24 +2558,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3195,37 +2583,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -3233,20 +2603,12 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3254,8 +2616,7 @@ STAGE PLANS: Stage: Stage-11 Map Reduce - Alias -> Map Operator Tree: - src1:subq1:b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -3264,38 +2625,23 @@ STAGE PLANS: condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -3308,24 +2654,15 @@ STAGE PLANS: alias: a Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3333,68 +2670,44 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3405,8 +2718,7 @@ STAGE PLANS: Stage: Stage-13 Map Reduce - Alias -> Map Operator Tree: - src2:subq2:a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -3415,38 +2727,23 @@ STAGE PLANS: condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -3459,24 +2756,15 @@ STAGE PLANS: alias: b Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3484,8 +2772,7 @@ STAGE PLANS: Stage: Stage-14 Map Reduce - Alias -> Map Operator Tree: - src2:subq2:b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -3494,38 +2781,23 @@ STAGE PLANS: condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -3538,24 +2810,15 @@ STAGE PLANS: alias: a Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3563,68 +2826,44 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - src2:subq2:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3706,9 +2945,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 @@ -3733,38 +2969,27 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 HashTable Sink Operator condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 Map Join Operator condition map: @@ -3772,41 +2997,29 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3823,38 +3036,27 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 HashTable Sink Operator condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 _col0 (type: int) Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - subq2:a + Map Operator Tree: TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 Map Join Operator condition map: @@ -3862,41 +3064,29 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3904,58 +3094,44 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4020,9 +3196,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-3, Stage-7, Stage-1 Stage-3 has a backup stage: Stage-1 @@ -4037,18 +3210,13 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:a + Map Operator Tree: TableScan alias: a Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 Map Join Operator condition map: @@ -4056,24 +3224,17 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -4086,19 +3247,14 @@ STAGE PLANS: alias: b Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4115,28 +3271,21 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 HashTable Sink Operator condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 key (type: int) Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -4145,41 +3294,29 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4187,58 +3324,44 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4327,9 +3450,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq4) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL subq4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 @@ -4354,38 +3474,27 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 HashTable Sink Operator condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:a + Map Operator Tree: TableScan alias: a Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 Map Join Operator condition map: @@ -4393,41 +3502,29 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4444,38 +3541,27 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 HashTable Sink Operator condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 _col0 (type: int) Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - subq4:subq3:a + Map Operator Tree: TableScan alias: a Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 Map Join Operator condition map: @@ -4483,41 +3569,29 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4525,58 +3599,44 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4655,9 +3715,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 @@ -4682,38 +3739,27 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: - expr: (key < 8) - type: boolean + predicate: (key < 8) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 HashTable Sink Operator condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a Filter Operator - predicate: - expr: (key < 8) - type: boolean + predicate: (key < 8) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 Map Join Operator condition map: @@ -4721,41 +3767,29 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4772,38 +3806,27 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: - expr: (key < 8) - type: boolean + predicate: (key < 8) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 HashTable Sink Operator condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 _col0 (type: int) Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - subq2:a + Map Operator Tree: TableScan alias: a Filter Operator - predicate: - expr: (key < 8) - type: boolean + predicate: (key < 8) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 Map Join Operator condition map: @@ -4811,41 +3834,29 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4853,58 +3864,44 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 8) - type: boolean + predicate: (key < 8) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4955,9 +3952,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_TABREF (TOK_TABNAME tbl2) a) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL a) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-3, Stage-7, Stage-1 Stage-3 has a backup stage: Stage-1 @@ -4972,18 +3966,13 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 Map Join Operator condition map: @@ -4991,24 +3980,17 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -5021,19 +4003,14 @@ STAGE PLANS: alias: a Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5050,28 +4027,21 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 HashTable Sink Operator condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 key (type: int) Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -5080,41 +4050,29 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5122,58 +4080,44 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5222,9 +4166,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-4, Stage-1 Stage-6 has a backup stage: Stage-1 @@ -5248,28 +4189,21 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 HashTable Sink Operator condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 key (type: int) + 1 _col0 (type: int) Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -5278,41 +4212,29 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 key (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5320,18 +4242,13 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 Map Join Operator condition map: @@ -5339,24 +4256,17 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[_col0]] - Position of Big Table: 1 + 0 key (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -5369,19 +4279,14 @@ STAGE PLANS: alias: a Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5389,49 +4294,37 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 key (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5490,9 +4383,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq3) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-7 is a root stage , consists of Stage-8, Stage-9, Stage-10, Stage-1 Stage-8 has a backup stage: Stage-1 @@ -5522,63 +4412,46 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 HashTable Sink Operator condition expressions: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - 2 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) subq3:a TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 HashTable Sink Operator condition expressions: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - 2 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 Map Join Operator condition map: @@ -5588,42 +4461,30 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - 2 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5643,63 +4504,46 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 HashTable Sink Operator condition expressions: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - 2 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) subq3:a TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 HashTable Sink Operator condition expressions: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - 2 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - subq2:a + Map Operator Tree: TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 Map Join Operator condition map: @@ -5709,42 +4553,30 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - 2 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5764,63 +4596,46 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 HashTable Sink Operator condition expressions: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - 2 [Column[_col0]] - Position of Big Table: 2 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) subq2:a TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 HashTable Sink Operator condition expressions: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - 2 [Column[_col0]] - Position of Big Table: 2 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - subq3:a + Map Operator Tree: TableScan alias: a Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 Map Join Operator condition map: @@ -5830,42 +4645,30 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - 2 [Column[_col0]] - Position of Big Table: 2 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5873,19 +4676,17 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -5894,40 +4695,28 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - 2 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -6000,9 +4789,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) value2)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-3, Stage-7, Stage-1 Stage-3 has a backup stage: Stage-1 @@ -6017,18 +4803,13 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - a:subq2:subq1:a + Map Operator Tree: TableScan alias: a Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 Map Join Operator condition map: @@ -6036,24 +4817,17 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -6066,19 +4840,14 @@ STAGE PLANS: alias: b Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -6095,28 +4864,21 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 HashTable Sink Operator condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 key (type: int) Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a:b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -6125,41 +4887,29 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -6167,58 +4917,44 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:subq2:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/binary_output_format.q.out ql/src/test/results/clientpositive/binary_output_format.q.out index a0ff660..bcfb8eb 100644 --- ql/src/test/results/clientpositive/binary_output_format.q.out +++ ql/src/test/results/clientpositive/binary_output_format.q.out @@ -50,7 +50,40 @@ SELECT TRANSFORM(*) FROM src POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST TOK_ALLCOLREF) TOK_SERDE TOK_RECORDWRITER 'cat' (TOK_SERDE (TOK_SERDENAME 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY 'serialization.last.column.takes.rest' 'true'))))) (TOK_RECORDREADER 'org.apache.hadoop.hive.ql.exec.BinaryRecordReader') (TOK_TABCOLLIST (TOK_TABCOL mydata TOK_STRING))))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest1 + TOK_SELECT + TOK_SELEXPR + TOK_TRANSFORM + TOK_EXPLIST + TOK_ALLCOLREF + TOK_SERDE + TOK_RECORDWRITER + 'cat' + TOK_SERDE + TOK_SERDENAME + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + TOK_TABLEPROPERTIES + TOK_TABLEPROPLIST + TOK_TABLEPROPERTY + 'serialization.last.column.takes.rest' + 'true' + TOK_RECORDREADER + 'org.apache.hadoop.hive.ql.exec.BinaryRecordReader' + TOK_TABCOLLIST + TOK_TABCOL + mydata + TOK_STRING + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -65,22 +98,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: @@ -94,15 +120,13 @@ STAGE PLANS: serialization.last.column.takes.rest true serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -208,8 +232,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -278,8 +301,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/bucket1.q.out ql/src/test/results/clientpositive/bucket1.q.out index 4d5804a..5ade5f8 100644 --- ql/src/test/results/clientpositive/bucket1.q.out +++ ql/src/test/results/clientpositive/bucket1.q.out @@ -12,7 +12,21 @@ insert overwrite table bucket1_1 select * from src POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucket1_1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucket1_1 + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -22,35 +36,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: UDFToInteger(_col0) - type: int - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + value expressions: _col0 (type: string), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -101,24 +101,17 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/bucket2.q.out ql/src/test/results/clientpositive/bucket2.q.out index b6ab31e..672903d 100644 --- ql/src/test/results/clientpositive/bucket2.q.out +++ ql/src/test/results/clientpositive/bucket2.q.out @@ -12,7 +12,21 @@ insert overwrite table bucket2_1 select * from src POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucket2_1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucket2_1 + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -22,35 +36,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: UDFToInteger(_col0) - type: int - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + value expressions: _col0 (type: string), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -101,24 +101,17 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 2 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -186,9 +179,6 @@ select * from bucket2_1 tablesample (bucket 1 out of 2) s order by key POSTHOOK: type: QUERY POSTHOOK: Lineage: bucket2_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bucket2_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME bucket2_1) (TOK_TABLEBUCKETSAMPLE 1 2) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -196,37 +186,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((hash(key) & 2147483647) % 2) = 0) - type: boolean + predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/bucket3.q.out ql/src/test/results/clientpositive/bucket3.q.out index 7d4a922..9232f6b 100644 --- ql/src/test/results/clientpositive/bucket3.q.out +++ ql/src/test/results/clientpositive/bucket3.q.out @@ -12,7 +12,25 @@ insert overwrite table bucket3_1 partition (ds='1') select * from src POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucket3_1) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucket3_1 + TOK_PARTSPEC + TOK_PARTVAL + ds + '1' + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -22,35 +40,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: UDFToInteger(_col0) - type: int - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + value expressions: _col0 (type: string), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -101,25 +105,18 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 2 Static Partition Specification: ds=1/ - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -207,9 +204,6 @@ POSTHOOK: Lineage: bucket3_1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchem POSTHOOK: Lineage: bucket3_1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: bucket3_1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bucket3_1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME bucket3_1) (TOK_TABLEBUCKETSAMPLE 1 2) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -217,41 +211,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((hash(key) & 2147483647) % 2) = 0) - type: boolean + predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/bucket4.q.out ql/src/test/results/clientpositive/bucket4.q.out index 9d9d9c8..fb2f619 100644 --- ql/src/test/results/clientpositive/bucket4.q.out +++ ql/src/test/results/clientpositive/bucket4.q.out @@ -12,7 +12,21 @@ insert overwrite table bucket4_1 select * from src POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucket4_1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucket4_1 + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -22,38 +36,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToInteger(_col0) - type: int + key expressions: UDFToInteger(_col0) (type: int) sort order: + - Map-reduce partition columns: - expr: UDFToInteger(_col0) - type: int - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + value expressions: _col0 (type: string), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -104,24 +102,17 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 2 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -191,9 +182,6 @@ select * from bucket4_1 tablesample (bucket 1 out of 2) s POSTHOOK: type: QUERY POSTHOOK: Lineage: bucket4_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bucket4_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME bucket4_1) (TOK_TABLEBUCKETSAMPLE 1 2) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -201,24 +189,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((hash(key) & 2147483647) % 2) = 0) - type: boolean + predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/bucket5.q.out ql/src/test/results/clientpositive/bucket5.q.out index d5c9e4e..8a49352 100644 --- ql/src/test/results/clientpositive/bucket5.q.out +++ ql/src/test/results/clientpositive/bucket5.q.out @@ -25,7 +25,40 @@ INSERT OVERWRITE TABLE bucketed_table SELECT key, value INSERT OVERWRITE TABLE unbucketed_table SELECT key, value cluster by key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketed_table))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME unbucketed_table))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketed_table + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + unbucketed_table + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_CLUSTERBY + TOK_TABLE_OR_COL + key + STAGE DEPENDENCIES: Stage-2 is a root stage @@ -43,47 +76,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToInteger(_col0) - type: int + key expressions: UDFToInteger(_col0) (type: int) sort order: + - Map-reduce partition columns: - expr: UDFToInteger(_col0) - type: int - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + value expressions: _col0 (type: string), _col1 (type: string) Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -151,24 +163,17 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -220,26 +225,16 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + value expressions: _col0 (type: string), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -268,24 +263,17 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -342,8 +330,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -409,8 +396,7 @@ STAGE PLANS: Stage: Stage-8 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/bucket_groupby.q.out ql/src/test/results/clientpositive/bucket_groupby.q.out index 5cbee12..2371bad 100644 --- ql/src/test/results/clientpositive/bucket_groupby.q.out +++ ql/src/test/results/clientpositive/bucket_groupby.q.out @@ -43,9 +43,6 @@ select key, count(1) from clustergroupby where ds='100' group by key limit 10 POSTHOOK: type: QUERY POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '100')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -53,57 +50,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - clustergroupby + Map Operator Tree: TableScan alias: clustergroupby + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -175,9 +158,6 @@ POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSc POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '101')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -185,57 +165,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - clustergroupby + Map Operator Tree: TableScan alias: clustergroupby + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -281,9 +247,6 @@ POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSc POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION length (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '101')) (TOK_GROUPBY (TOK_FUNCTION length (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -291,57 +254,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - clustergroupby + Map Operator Tree: TableScan alias: clustergroupby + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: length(key) - type: int + aggregations: count(1) + keys: length(key) (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -378,9 +327,6 @@ POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSc POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION abs (TOK_FUNCTION length (TOK_TABLE_OR_COL key)))) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '101')) (TOK_GROUPBY (TOK_FUNCTION abs (TOK_FUNCTION length (TOK_TABLE_OR_COL key)))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -388,57 +334,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - clustergroupby + Map Operator Tree: TableScan alias: clustergroupby + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: abs(length(key)) - type: int + aggregations: count(1) + keys: abs(length(key)) (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -477,9 +409,6 @@ POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSc POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '101')) (TOK_GROUPBY (TOK_TABLE_OR_COL key) 3) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -487,65 +416,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - clustergroupby + Map Operator Tree: TableScan alias: clustergroupby + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: 3 - type: int + aggregations: count(1) + keys: key (type: string), 3 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: int + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -591,9 +498,6 @@ POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSc POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value) key) (TOK_SELEXPR (TOK_TABLE_OR_COL key) value)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '101')))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -601,57 +505,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq:clustergroupby + Map Operator Tree: TableScan alias: clustergroupby + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -695,9 +585,6 @@ POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSc POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -705,56 +592,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - clustergroupby + Map Operator Tree: TableScan alias: clustergroupby + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1099,9 +970,6 @@ POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSc POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) 3))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1109,64 +977,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - clustergroupby + Map Operator Tree: TableScan alias: clustergroupby + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: 3 - type: int + aggregations: count(1) + keys: key (type: string), 3 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: int + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1257,9 +1101,6 @@ POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSc POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '102')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1267,57 +1108,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - clustergroupby + Map Operator Tree: TableScan alias: clustergroupby + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) + aggregations: count(1) bucketGroup: true - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1365,9 +1193,6 @@ POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSc POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '102')) (TOK_GROUPBY (TOK_TABLE_OR_COL value)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1375,57 +1200,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - clustergroupby + Map Operator Tree: TableScan alias: clustergroupby + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: value - type: string + aggregations: count(1) + keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1473,9 +1284,6 @@ POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSc POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '102')) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1483,67 +1291,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - clustergroupby + Map Operator Tree: TableScan alias: clustergroupby + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) + aggregations: count(1) bucketGroup: true - keys: - expr: key - type: string - expr: value - type: string + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1670,9 +1455,6 @@ POSTHOOK: Lineage: clustergroupby PARTITION(ds=102).key SIMPLE [(src)src.FieldSc POSTHOOK: Lineage: clustergroupby PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=103).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=103).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '103')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1680,57 +1462,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - clustergroupby + Map Operator Tree: TableScan alias: clustergroupby + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) + aggregations: count(1) bucketGroup: true - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1782,9 +1551,6 @@ POSTHOOK: Lineage: clustergroupby PARTITION(ds=102).key SIMPLE [(src)src.FieldSc POSTHOOK: Lineage: clustergroupby PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=103).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=103).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '103')) (TOK_GROUPBY (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1792,67 +1558,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - clustergroupby + Map Operator Tree: TableScan alias: clustergroupby + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: value - type: string - expr: key - type: string + aggregations: count(1) + keys: value (type: string), key (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/bucket_map_join_1.q.out ql/src/test/results/clientpositive/bucket_map_join_1.q.out index 7e945d0..75bcda8 100644 --- ql/src/test/results/clientpositive/bucket_map_join_1.q.out +++ ql/src/test/results/clientpositive/bucket_map_join_1.q.out @@ -47,7 +47,51 @@ explain extended select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table1) a) (TOK_TABREF (TOK_TABNAME table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + table1 + a + TOK_TABREF + TOK_TABNAME + table2 + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + value + . + TOK_TABLE_OR_COL + b + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -56,12 +100,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 0 dataSize: 20 basicStatsState: PARTIAL colStatsState: NONE + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -69,32 +111,23 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key], Column[value]] - 1 [Column[key], Column[value]] + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) Position of Big Table: 0 - Statistics: - numRows: 0 dataSize: 22 basicStatsState: PARTIAL colStatsState: NONE + Statistics: Num rows: 0 Data size: 22 Basic stats: PARTIAL Column stats: NONE Select Operator - Statistics: - numRows: 0 dataSize: 22 basicStatsState: PARTIAL colStatsState: NONE + Statistics: Num rows: 0 Data size: 22 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -105,8 +138,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 0 dataSize: 21 basicStatsState: PARTIAL colStatsState: NONE + Statistics: Num rows: 0 Data size: 21 Basic stats: PARTIAL Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -162,27 +194,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/bucket_map_join_2.q.out ql/src/test/results/clientpositive/bucket_map_join_2.q.out index 9420338..a737f82 100644 --- ql/src/test/results/clientpositive/bucket_map_join_2.q.out +++ ql/src/test/results/clientpositive/bucket_map_join_2.q.out @@ -47,7 +47,51 @@ explain extended select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table1) a) (TOK_TABREF (TOK_TABNAME table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + table1 + a + TOK_TABREF + TOK_TABNAME + table2 + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + value + . + TOK_TABLE_OR_COL + b + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -56,12 +100,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 0 dataSize: 20 basicStatsState: PARTIAL colStatsState: NONE + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -69,32 +111,23 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key], Column[value]] - 1 [Column[key], Column[value]] + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) Position of Big Table: 0 - Statistics: - numRows: 0 dataSize: 22 basicStatsState: PARTIAL colStatsState: NONE + Statistics: Num rows: 0 Data size: 22 Basic stats: PARTIAL Column stats: NONE Select Operator - Statistics: - numRows: 0 dataSize: 22 basicStatsState: PARTIAL colStatsState: NONE + Statistics: Num rows: 0 Data size: 22 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -105,8 +138,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 0 dataSize: 21 basicStatsState: PARTIAL colStatsState: NONE + Statistics: Num rows: 0 Data size: 21 Basic stats: PARTIAL Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -162,27 +194,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/bucketcontext_1.q.out ql/src/test/results/clientpositive/bucketcontext_1.q.out index 6d5fd24..930be79 100644 --- ql/src/test/results/clientpositive/bucketcontext_1.q.out +++ ql/src/test/results/clientpositive/bucketcontext_1.q.out @@ -78,7 +78,41 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -87,12 +121,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 116 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -100,33 +132,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 - Statistics: - numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -180,8 +203,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 27 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -287,27 +309,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -350,7 +365,41 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -359,12 +408,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 116 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -372,24 +419,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -487,15 +529,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/bucketcontext_2.q.out ql/src/test/results/clientpositive/bucketcontext_2.q.out index 82d0d7d..88f747a 100644 --- ql/src/test/results/clientpositive/bucketcontext_2.q.out +++ ql/src/test/results/clientpositive/bucketcontext_2.q.out @@ -66,7 +66,41 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -75,12 +109,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 54 dataSize: 5500 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -88,33 +120,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -168,8 +191,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -275,27 +297,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -338,7 +353,41 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -347,12 +396,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 54 dataSize: 5500 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -360,24 +407,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -475,15 +517,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/bucketcontext_3.q.out ql/src/test/results/clientpositive/bucketcontext_3.q.out index 4e8fb6e..3da1cc9 100644 --- ql/src/test/results/clientpositive/bucketcontext_3.q.out +++ ql/src/test/results/clientpositive/bucketcontext_3.q.out @@ -66,7 +66,41 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -75,12 +109,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -88,33 +120,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -210,8 +233,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 54 dataSize: 5500 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -272,27 +294,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -335,7 +350,41 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -344,12 +393,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -357,24 +404,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -427,15 +469,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/bucketcontext_4.q.out ql/src/test/results/clientpositive/bucketcontext_4.q.out index d25a50a..33dee62 100644 --- ql/src/test/results/clientpositive/bucketcontext_4.q.out +++ ql/src/test/results/clientpositive/bucketcontext_4.q.out @@ -78,7 +78,41 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -87,12 +121,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 27 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -100,33 +132,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 - Statistics: - numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -222,8 +245,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 116 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -284,27 +306,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -347,7 +362,41 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -356,12 +405,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 27 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -369,24 +416,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -439,15 +481,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/bucketcontext_5.q.out ql/src/test/results/clientpositive/bucketcontext_5.q.out index cabef60..eb751f3 100644 --- ql/src/test/results/clientpositive/bucketcontext_5.q.out +++ ql/src/test/results/clientpositive/bucketcontext_5.q.out @@ -51,7 +51,41 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -60,12 +94,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 27 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -73,33 +105,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -110,8 +133,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -174,27 +196,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -231,7 +246,41 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -240,12 +289,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 27 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -253,24 +300,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -325,15 +367,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/bucketcontext_6.q.out ql/src/test/results/clientpositive/bucketcontext_6.q.out index 12a0151..320b8b9 100644 --- ql/src/test/results/clientpositive/bucketcontext_6.q.out +++ ql/src/test/results/clientpositive/bucketcontext_6.q.out @@ -65,7 +65,41 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -74,12 +108,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 54 dataSize: 5500 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -87,33 +119,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -124,8 +147,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -231,27 +253,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -292,7 +307,41 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -301,12 +350,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 54 dataSize: 5500 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -314,24 +361,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -429,15 +471,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/bucketcontext_7.q.out ql/src/test/results/clientpositive/bucketcontext_7.q.out index eeec5ed..ef4f295 100644 --- ql/src/test/results/clientpositive/bucketcontext_7.q.out +++ ql/src/test/results/clientpositive/bucketcontext_7.q.out @@ -91,7 +91,41 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -100,12 +134,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 54 dataSize: 5500 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -113,33 +145,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 - Statistics: - numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -235,8 +258,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 116 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -342,27 +364,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -407,7 +422,41 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -416,12 +465,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 54 dataSize: 5500 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -429,24 +476,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -544,15 +586,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/bucketcontext_8.q.out ql/src/test/results/clientpositive/bucketcontext_8.q.out index 913d6e6..f9e6835 100644 --- ql/src/test/results/clientpositive/bucketcontext_8.q.out +++ ql/src/test/results/clientpositive/bucketcontext_8.q.out @@ -91,7 +91,41 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -100,12 +134,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 116 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -113,33 +145,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 - Statistics: - numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -235,8 +258,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 54 dataSize: 5500 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -342,27 +364,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -407,7 +422,41 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -416,12 +465,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 116 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -429,24 +476,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) Position of Big Table: 1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -544,15 +586,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out index 0fccd5b..e734864 100644 --- ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out +++ ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out @@ -28,9 +28,6 @@ SELECT tmp1.name as name FROM ( JOIN (SELECT 'MMM' AS n FROM T1) tmp3 ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL name)) (TOK_SELEXPR 'MMM' n)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'MMM' n)))) tmp2)) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'MMM' n)))) tmp3) (AND (= (. (TOK_TABLE_OR_COL tmp1) n) (. (TOK_TABLE_OR_COL tmp2) n)) (= (. (TOK_TABLE_OR_COL tmp1) n) (. (TOK_TABLE_OR_COL tmp3) n))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) name) name)))) ttt)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME T2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 5000000))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -40,63 +37,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - ttt:tmp1:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: name - type: string - expr: 'MMM' - type: string - outputColumnNames: _col0, _col1 + expressions: 'MMM' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - ttt:tmp2:t1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE TableScan alias: t1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: 'MMM' - type: string + expressions: 'MMM' (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - ttt:tmp3:t1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE TableScan alias: t1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: 'MMM' - type: string - outputColumnNames: _col0 + expressions: name (type: string), 'MMM' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 2 + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -106,17 +84,17 @@ STAGE PLANS: 0 {VALUE._col0} 1 2 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 5000000 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -124,21 +102,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 5000000 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -182,9 +160,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT COUNT(1) FROM T2 POSTHOOK: type: QUERY POSTHOOK: Lineage: t2.name SIMPLE [(t1)t1.FieldSchema(name:name, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -192,38 +167,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t2 + Map Operator Tree: TableScan alias: t2 + Statistics: Num rows: 0 Data size: 80294704 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 80294704 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -268,9 +239,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT COUNT(1) FROM T3 POSTHOOK: type: QUERY POSTHOOK: Lineage: t2.name SIMPLE [(t1)t1.FieldSchema(name:name, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -278,38 +246,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t3 + Map Operator Tree: TableScan alias: t3 + Statistics: Num rows: 0 Data size: 11603 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 11603 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/bucketmapjoin1.q.out ql/src/test/results/clientpositive/bucketmapjoin1.q.out index a4f480c..81ca8a7 100644 --- ql/src/test/results/clientpositive/bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin1.q.out @@ -26,7 +26,60 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key where b.ds="2008-04-08" POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + STAGE DEPENDENCIES: Stage-3 is a root stage @@ -44,24 +97,19 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (ds = '2008-04-08') - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + predicate: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE HashTable Sink Operator condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) Position of Big Table: 0 Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -71,61 +119,6 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - GatherStats: false - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 0 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - BucketMapJoin: true - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col6 - type: string - outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types int:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Local Work: Map Reduce Local Work @@ -158,7 +151,60 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key where b.ds="2008-04-08" POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -167,68 +213,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b - TableScan - alias: b - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '2008-04-08') - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - BucketMapJoin: true - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col6 - type: string - outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types int:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -239,8 +223,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -344,7 +327,61 @@ from srcbucket_mapjoin a join srcbucket_mapjoin_part b on a.key=b.key where b.ds="2008-04-08" POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -359,12 +396,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 26 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -372,33 +407,23 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col5 Position of Big Table: 0 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -470,8 +495,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 55 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -566,8 +590,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -633,8 +656,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -872,7 +894,61 @@ POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a. POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -887,12 +963,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 55 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -900,33 +974,23 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col5 Position of Big Table: 1 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -961,8 +1025,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 26 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -1061,8 +1124,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -1143,8 +1205,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/bucketmapjoin10.q.out ql/src/test/results/clientpositive/bucketmapjoin10.q.out index 561465f..60c66ea 100644 --- ql/src/test/results/clientpositive/bucketmapjoin10.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin10.q.out @@ -115,7 +115,55 @@ FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + a + part + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + b + part + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -124,12 +172,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 1737 dataSize: 6950 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1737 Data size: 6950 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -137,32 +183,23 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) Position of Big Table: 0 - Statistics: - numRows: 1910 dataSize: 7645 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1910 Data size: 7645 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: - numRows: 1910 dataSize: 7645 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1910 Data size: 7645 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -256,8 +293,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 1737 dataSize: 6950 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1737 Data size: 6950 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -354,27 +390,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/bucketmapjoin11.q.out ql/src/test/results/clientpositive/bucketmapjoin11.q.out index 51c3927..2cc2bd4 100644 --- ql/src/test/results/clientpositive/bucketmapjoin11.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin11.q.out @@ -121,7 +121,55 @@ FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + a + part + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + b + part + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -130,12 +178,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 2140 dataSize: 8562 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2140 Data size: 8562 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -143,33 +189,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) Position of Big Table: 0 - Statistics: - numRows: 2354 dataSize: 9418 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2354 Data size: 9418 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 2354 dataSize: 9418 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2354 Data size: 9418 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -263,8 +300,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 2140 dataSize: 8562 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2140 Data size: 8562 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -368,27 +404,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -443,7 +472,65 @@ FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = b.part AND a.part IS NOT NULL AND b.part IS NOT NULL POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) (. (TOK_TABLE_OR_COL b) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + AND + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + . + TOK_TABLE_OR_COL + b + part + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + a + part + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + b + part + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -452,12 +539,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 2140 dataSize: 8562 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2140 Data size: 8562 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -465,33 +550,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key], Column[part]] - 1 [Column[key], Column[part]] + 0 key (type: int), part (type: string) + 1 key (type: int), part (type: string) Position of Big Table: 0 - Statistics: - numRows: 2354 dataSize: 9418 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2354 Data size: 9418 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 2354 dataSize: 9418 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2354 Data size: 9418 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -585,8 +661,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 2140 dataSize: 8562 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2140 Data size: 8562 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -690,27 +765,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/bucketmapjoin12.q.out ql/src/test/results/clientpositive/bucketmapjoin12.q.out index 2c48acd..2da135e 100644 --- ql/src/test/results/clientpositive/bucketmapjoin12.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin12.q.out @@ -89,7 +89,55 @@ FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' and b.part = '1' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + and + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '1' + = + . + TOK_TABLE_OR_COL + b + part + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -98,12 +146,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 687 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -111,33 +157,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) Position of Big Table: 0 - Statistics: - numRows: 755 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 755 Data size: 3025 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 755 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 755 Data size: 3025 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -189,8 +226,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 687 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -250,27 +286,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -325,7 +354,55 @@ FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_3 b ON a.key = b.key AND a.part = '1' and b.part = '1' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_3) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_3 + b + and + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '1' + = + . + TOK_TABLE_OR_COL + b + part + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -334,12 +411,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 687 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -347,32 +422,23 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) Position of Big Table: 0 - Statistics: - numRows: 755 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 755 Data size: 3025 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: - numRows: 755 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 755 Data size: 3025 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -424,8 +490,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 687 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -478,27 +543,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/bucketmapjoin13.q.out ql/src/test/results/clientpositive/bucketmapjoin13.q.out index 5be19d0..e6af5a9 100644 --- ql/src/test/results/clientpositive/bucketmapjoin13.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin13.q.out @@ -95,7 +95,41 @@ POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -104,12 +138,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 1000 dataSize: 10624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -117,32 +149,23 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) Position of Big Table: 0 - Statistics: - numRows: 1100 dataSize: 11686 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: - numRows: 1100 dataSize: 11686 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -195,8 +218,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -293,27 +315,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -382,7 +397,48 @@ POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '2')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '2' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -391,12 +447,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -404,33 +458,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) Position of Big Table: 0 - Statistics: - numRows: 550 dataSize: 5843 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 550 dataSize: 5843 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -483,8 +528,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -544,27 +588,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -645,7 +682,41 @@ POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -654,12 +725,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -667,33 +736,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) Position of Big Table: 0 - Statistics: - numRows: 550 dataSize: 5843 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 550 dataSize: 5843 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -746,8 +806,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -807,27 +866,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -910,7 +962,41 @@ POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -919,12 +1005,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -932,33 +1016,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) Position of Big Table: 0 - Statistics: - numRows: 550 dataSize: 5843 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 550 dataSize: 5843 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -1011,8 +1086,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -1072,27 +1146,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/bucketmapjoin2.q.out ql/src/test/results/clientpositive/bucketmapjoin2.q.out index 61bc7e7..7d6dd86 100644 --- ql/src/test/results/clientpositive/bucketmapjoin2.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin2.q.out @@ -74,7 +74,61 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -89,12 +143,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 55 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -102,33 +154,23 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 Position of Big Table: 0 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col6 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -200,8 +242,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 29 dataSize: 3062 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -295,8 +336,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -362,8 +402,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -605,7 +644,61 @@ POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_pa POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -620,12 +713,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 29 dataSize: 3062 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -633,33 +724,23 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 Position of Big Table: 1 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col6 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -736,8 +817,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 55 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -836,8 +916,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -918,8 +997,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -1335,7 +1413,54 @@ POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_pa POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1350,12 +1475,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 55 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -1363,33 +1486,23 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 Position of Big Table: 0 - Statistics: - numRows: 63 dataSize: 6736 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col6 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 63 dataSize: 6736 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 63 dataSize: 6736 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1507,8 +1620,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 58 dataSize: 6124 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -1607,8 +1719,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -1689,8 +1800,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/bucketmapjoin3.q.out ql/src/test/results/clientpositive/bucketmapjoin3.q.out index 630a545..f62ce1d 100644 --- ql/src/test/results/clientpositive/bucketmapjoin3.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin3.q.out @@ -91,7 +91,68 @@ from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) b) (and (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")) (= (. (TOK_TABLE_OR_COL a) ds) "2008-04-08")))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + b + and + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + = + . + TOK_TABLE_OR_COL + a + ds + "2008-04-08" + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -106,12 +167,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 29 dataSize: 3062 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -119,33 +178,23 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 Position of Big Table: 0 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col6 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -217,8 +266,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 55 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -312,8 +360,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -379,8 +426,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -622,7 +668,68 @@ POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_pa POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) b) (and (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")) (= (. (TOK_TABLE_OR_COL a) ds) "2008-04-08")))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + b + and + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + = + . + TOK_TABLE_OR_COL + a + ds + "2008-04-08" + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -637,12 +744,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 55 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -650,33 +755,23 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 Position of Big Table: 1 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col6 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -753,8 +848,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 29 dataSize: 3062 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -853,8 +947,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -935,8 +1028,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/bucketmapjoin4.q.out ql/src/test/results/clientpositive/bucketmapjoin4.q.out index 76b24b2..ef980f5 100644 --- ql/src/test/results/clientpositive/bucketmapjoin4.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin4.q.out @@ -91,7 +91,54 @@ from srcbucket_mapjoin a join srcbucket_mapjoin b on a.key=b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -106,12 +153,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 26 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -119,33 +164,23 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col5 Position of Big Table: 0 - Statistics: - numRows: 28 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 28 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 28 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -175,8 +210,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 26 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -271,8 +305,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -338,8 +371,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -569,7 +601,54 @@ POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a. POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin)b.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -584,12 +663,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 26 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -597,33 +674,23 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col5 Position of Big Table: 1 - Statistics: - numRows: 28 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 28 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 28 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -658,8 +725,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 26 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -759,8 +825,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -841,8 +906,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/bucketmapjoin5.q.out ql/src/test/results/clientpositive/bucketmapjoin5.q.out index da826f5..d725c7d 100644 --- ql/src/test/results/clientpositive/bucketmapjoin5.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin5.q.out @@ -129,7 +129,54 @@ from srcbucket_mapjoin a join srcbucket_mapjoin_part b on a.key=b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -144,12 +191,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 110 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 110 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -157,33 +202,23 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col5 Position of Big Table: 1 - Statistics: - numRows: 121 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 121 Data size: 12786 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 121 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 121 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 121 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 121 Data size: 12786 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -213,8 +248,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 26 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -352,8 +386,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -419,8 +452,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -662,7 +694,54 @@ POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a. POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -677,12 +756,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 58 dataSize: 6124 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -690,33 +767,23 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col5 Position of Big Table: 1 - Statistics: - numRows: 63 dataSize: 6736 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 63 dataSize: 6736 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 63 dataSize: 6736 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -751,8 +818,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 26 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -895,8 +961,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -977,8 +1042,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/bucketmapjoin7.q.out ql/src/test/results/clientpositive/bucketmapjoin7.q.out index d978a4b..1665a41 100644 --- ql/src/test/results/clientpositive/bucketmapjoin7.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin7.q.out @@ -53,7 +53,64 @@ FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' LIMIT 1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL b) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_LIMIT 1))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_LIMIT + 1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -62,12 +119,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 687 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -75,34 +130,26 @@ STAGE PLANS: condition expressions: 0 {key} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col7 Position of Big Table: 0 - Statistics: - numRows: 755 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 755 Data size: 3025 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col7 - type: string + expressions: _col0 (type: int), _col7 (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 755 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 755 Data size: 3025 Basic stats: COMPLETE Column stats: NONE Limit - Statistics: - numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -171,8 +218,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 26 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: diff --git ql/src/test/results/clientpositive/bucketmapjoin8.q.out ql/src/test/results/clientpositive/bucketmapjoin8.q.out index 0e8f7d4..79d4b8e 100644 --- ql/src/test/results/clientpositive/bucketmapjoin8.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin8.q.out @@ -61,7 +61,55 @@ FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' and b.part = '1' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + and + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '1' + = + . + TOK_TABLE_OR_COL + b + part + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -70,12 +118,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 687 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -83,33 +129,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) Position of Big Table: 0 - Statistics: - numRows: 755 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 755 Data size: 3025 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 755 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 755 Data size: 3025 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -162,8 +199,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 687 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -223,27 +259,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -306,7 +335,55 @@ FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' and b.part = '1' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + and + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '1' + = + . + TOK_TABLE_OR_COL + b + part + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -315,12 +392,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 687 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -328,33 +403,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) Position of Big Table: 0 - Statistics: - numRows: 755 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 755 Data size: 3025 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 755 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 755 Data size: 3025 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -407,8 +473,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 687 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -468,27 +533,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/bucketmapjoin9.q.out ql/src/test/results/clientpositive/bucketmapjoin9.q.out index 9ce7ac6..fd4f9f9 100644 --- ql/src/test/results/clientpositive/bucketmapjoin9.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin9.q.out @@ -67,7 +67,55 @@ FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' and b.part = '1' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + and + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '1' + = + . + TOK_TABLE_OR_COL + b + part + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -76,12 +124,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 687 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -89,32 +135,23 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) Position of Big Table: 0 - Statistics: - numRows: 1155 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1155 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: - numRows: 1155 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1155 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -167,8 +204,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 1050 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1050 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -221,27 +257,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -333,7 +362,55 @@ FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' AND b.part = '1' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '1' + = + . + TOK_TABLE_OR_COL + b + part + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -342,12 +419,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 687 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -355,32 +430,23 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) Position of Big Table: 0 - Statistics: - numRows: 755 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 755 Data size: 3025 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: - numRows: 755 dataSize: 3025 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 755 Data size: 3025 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -433,8 +499,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 687 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -487,27 +552,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out index bc4605b..cfff344 100644 --- ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out @@ -57,7 +57,61 @@ from srcbucket_mapjoin a join srcbucket_mapjoin_part b on a.key=b.key where b.ds="2008-04-08" POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -72,12 +126,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 26 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -85,32 +137,22 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col5 Position of Big Table: 0 - Statistics: - numRows: 44 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 44 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 44 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -182,8 +224,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 40 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -271,8 +312,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -338,8 +378,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out index 2518c6f..db9ee36 100644 --- ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out @@ -64,7 +64,54 @@ from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b on a.key=b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -79,12 +126,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 26 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -92,33 +137,23 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col5 Position of Big Table: 0 - Statistics: - numRows: 63 dataSize: 6736 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 63 dataSize: 6736 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 63 dataSize: 6736 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -231,8 +266,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 58 dataSize: 6124 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -327,8 +361,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -394,8 +427,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/bucketmapjoin_negative3.q.out ql/src/test/results/clientpositive/bucketmapjoin_negative3.q.out index 9229d37..b457bc4 100644 --- ql/src/test/results/clientpositive/bucketmapjoin_negative3.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin_negative3.q.out @@ -113,7 +113,50 @@ POSTHOOK: query: -- should be allowed explain extended select /* + MAPJOIN(R) */ * from test1 L join test1 R on L.key=R.key AND L.value=R.value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test1) L) (TOK_TABREF (TOK_TABNAME test1) R) (AND (= (. (TOK_TABLE_OR_COL L) key) (. (TOK_TABLE_OR_COL R) key)) (= (. (TOK_TABLE_OR_COL L) value) (. (TOK_TABLE_OR_COL R) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST R))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test1 + L + TOK_TABREF + TOK_TABNAME + test1 + R + AND + = + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + = + . + TOK_TABLE_OR_COL + L + value + . + TOK_TABLE_OR_COL + R + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -122,12 +165,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - l + Map Operator Tree: TableScan alias: l - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -135,35 +176,23 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[value]] - 1 [Column[key], Column[value]] + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -189,8 +218,7 @@ STAGE PLANS: r TableScan alias: r - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -260,7 +288,50 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test2 L join test2 R on L.key=R.key AND L.value=R.value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test2) L) (TOK_TABREF (TOK_TABNAME test2) R) (AND (= (. (TOK_TABLE_OR_COL L) key) (. (TOK_TABLE_OR_COL R) key)) (= (. (TOK_TABLE_OR_COL L) value) (. (TOK_TABLE_OR_COL R) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST R))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test2 + L + TOK_TABREF + TOK_TABNAME + test2 + R + AND + = + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + = + . + TOK_TABLE_OR_COL + L + value + . + TOK_TABLE_OR_COL + R + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -269,12 +340,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - l + Map Operator Tree: TableScan alias: l - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -282,35 +351,23 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[value]] - 1 [Column[key], Column[value]] + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -336,8 +393,7 @@ STAGE PLANS: r TableScan alias: r - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -409,7 +465,45 @@ POSTHOOK: query: -- should not apply bucket mapjoin explain extended select /* + MAPJOIN(R) */ * from test1 L join test1 R on L.key+L.key=R.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test1) L) (TOK_TABREF (TOK_TABNAME test1) R) (= (+ (. (TOK_TABLE_OR_COL L) key) (. (TOK_TABLE_OR_COL L) key)) (. (TOK_TABLE_OR_COL R) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST R))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test1 + L + TOK_TABREF + TOK_TABNAME + test1 + R + = + + + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -418,12 +512,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - l + Map Operator Tree: TableScan alias: l - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -431,34 +523,22 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [GenericUDFOPPlus(Column[key], Column[key])] - 1 [GenericUDFBridge(Column[key])] + 0 (key + key) (type: double) + 1 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -484,8 +564,7 @@ STAGE PLANS: r TableScan alias: r - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -548,7 +627,50 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test1 L join test2 R on L.key=R.key AND L.value=R.value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test1) L) (TOK_TABREF (TOK_TABNAME test2) R) (AND (= (. (TOK_TABLE_OR_COL L) key) (. (TOK_TABLE_OR_COL R) key)) (= (. (TOK_TABLE_OR_COL L) value) (. (TOK_TABLE_OR_COL R) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST R))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test1 + L + TOK_TABREF + TOK_TABNAME + test2 + R + AND + = + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + = + . + TOK_TABLE_OR_COL + L + value + . + TOK_TABLE_OR_COL + R + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -557,12 +679,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - l + Map Operator Tree: TableScan alias: l - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -570,34 +690,22 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[value]] - 1 [Column[key], Column[value]] + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -623,8 +731,7 @@ STAGE PLANS: r TableScan alias: r - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -687,7 +794,50 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test1 L join test3 R on L.key=R.key AND L.value=R.value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test1) L) (TOK_TABREF (TOK_TABNAME test3) R) (AND (= (. (TOK_TABLE_OR_COL L) key) (. (TOK_TABLE_OR_COL R) key)) (= (. (TOK_TABLE_OR_COL L) value) (. (TOK_TABLE_OR_COL R) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST R))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test1 + L + TOK_TABREF + TOK_TABNAME + test3 + R + AND + = + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + = + . + TOK_TABLE_OR_COL + L + value + . + TOK_TABLE_OR_COL + R + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -696,12 +846,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - l + Map Operator Tree: TableScan alias: l - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -709,34 +857,22 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[value]] - 1 [Column[key], Column[value]] + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -762,8 +898,7 @@ STAGE PLANS: r TableScan alias: r - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -826,7 +961,50 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test1 L join test4 R on L.key=R.key AND L.value=R.value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test1) L) (TOK_TABREF (TOK_TABNAME test4) R) (AND (= (. (TOK_TABLE_OR_COL L) key) (. (TOK_TABLE_OR_COL R) key)) (= (. (TOK_TABLE_OR_COL L) value) (. (TOK_TABLE_OR_COL R) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST R))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test1 + L + TOK_TABREF + TOK_TABNAME + test4 + R + AND + = + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + = + . + TOK_TABLE_OR_COL + L + value + . + TOK_TABLE_OR_COL + R + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -835,12 +1013,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - l + Map Operator Tree: TableScan alias: l - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -848,34 +1024,22 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[value]] - 1 [Column[key], Column[value]] + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -901,8 +1065,7 @@ STAGE PLANS: r TableScan alias: r - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -965,7 +1128,50 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test2 L join test3 R on L.key=R.key AND L.value=R.value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test2) L) (TOK_TABREF (TOK_TABNAME test3) R) (AND (= (. (TOK_TABLE_OR_COL L) key) (. (TOK_TABLE_OR_COL R) key)) (= (. (TOK_TABLE_OR_COL L) value) (. (TOK_TABLE_OR_COL R) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST R))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test2 + L + TOK_TABREF + TOK_TABNAME + test3 + R + AND + = + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + = + . + TOK_TABLE_OR_COL + L + value + . + TOK_TABLE_OR_COL + R + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -974,12 +1180,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - l + Map Operator Tree: TableScan alias: l - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -987,34 +1191,22 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[value]] - 1 [Column[key], Column[value]] + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1040,8 +1232,7 @@ STAGE PLANS: r TableScan alias: r - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -1104,7 +1295,50 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test2 L join test4 R on L.key=R.key AND L.value=R.value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test2) L) (TOK_TABREF (TOK_TABNAME test4) R) (AND (= (. (TOK_TABLE_OR_COL L) key) (. (TOK_TABLE_OR_COL R) key)) (= (. (TOK_TABLE_OR_COL L) value) (. (TOK_TABLE_OR_COL R) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST R))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test2 + L + TOK_TABREF + TOK_TABNAME + test4 + R + AND + = + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + = + . + TOK_TABLE_OR_COL + L + value + . + TOK_TABLE_OR_COL + R + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1113,12 +1347,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - l + Map Operator Tree: TableScan alias: l - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -1126,34 +1358,22 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[value]] - 1 [Column[key], Column[value]] + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1179,8 +1399,7 @@ STAGE PLANS: r TableScan alias: r - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -1243,7 +1462,50 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test3 L join test4 R on L.key=R.key AND L.value=R.value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test3) L) (TOK_TABREF (TOK_TABNAME test4) R) (AND (= (. (TOK_TABLE_OR_COL L) key) (. (TOK_TABLE_OR_COL R) key)) (= (. (TOK_TABLE_OR_COL L) value) (. (TOK_TABLE_OR_COL R) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST R))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test3 + L + TOK_TABREF + TOK_TABNAME + test4 + R + AND + = + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + = + . + TOK_TABLE_OR_COL + L + value + . + TOK_TABLE_OR_COL + R + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1252,12 +1514,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - l + Map Operator Tree: TableScan alias: l - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -1265,34 +1525,22 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[value]] - 1 [Column[key], Column[value]] + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 23 dataSize: 4620 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 23 Data size: 4620 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1318,8 +1566,7 @@ STAGE PLANS: r TableScan alias: r - Statistics: - numRows: 21 dataSize: 4200 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out index 0440bdd..1aea8c1 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out @@ -46,9 +46,6 @@ SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -57,20 +54,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -177,9 +171,6 @@ POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSch POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -188,20 +179,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -320,9 +308,6 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldS POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL x) value)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -331,20 +316,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: concat(value, value) - type: string + expressions: key (type: int), concat(value, value) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -388,9 +370,6 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldS POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL x) key))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -399,36 +378,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key + key) - type: int - expr: value - type: string + expressions: (key + key) (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -472,9 +441,6 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldS POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) v1)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) k1)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL x) v1) (. (TOK_TABLE_OR_COL x) v1)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -483,20 +449,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: concat(value, value) - type: string + expressions: key (type: int), concat(value, value) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out index c69811f..415fbd9 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out @@ -105,9 +105,6 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSch POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -116,32 +113,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col6) - type: string + expressions: _col0 (type: int), concat(_col1, _col6) (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -281,9 +271,6 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSch POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) ds)) (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) - STAGE DEPENDENCIES: Stage-6 is a root stage , consists of Stage-4, Stage-5, Stage-1 Stage-4 has a backup stage: Stage-1 @@ -298,8 +285,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -308,33 +294,18 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col6) - type: string + expressions: _col0 (type: int), concat(_col1, _col6) (type: string) outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -349,7 +320,6 @@ STAGE PLANS: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -373,8 +343,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -383,33 +352,18 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col6) - type: string + expressions: _col0 (type: int), concat(_col1, _col6) (type: string) outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -424,7 +378,6 @@ STAGE PLANS: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -433,48 +386,32 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col6) - type: string + expressions: _col0 (type: int), concat(_col1, _col6) (type: string) outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -631,9 +568,6 @@ POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldS POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) ds)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -642,32 +576,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col6) - type: string + expressions: _col0 (type: int), concat(_col1, _col6) (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -851,9 +778,6 @@ POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldS POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -862,39 +786,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:test_table1 + Map Operator Tree: TableScan alias: test_table1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col3) - type: string + expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1068,9 +982,6 @@ POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldS POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL value)) v1)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL value)) v2)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) v1) (. (TOK_TABLE_OR_COL b) v2)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1079,39 +990,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:test_table1 + Map Operator Tree: TableScan alias: test_table1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: concat(value, value) - type: string + expressions: key (type: int), concat(value, value) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col3) - type: string + expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1293,9 +1194,6 @@ POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_tab POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL a) key))) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))))) - STAGE DEPENDENCIES: Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 Stage-7 has a backup stage: Stage-1 @@ -1321,34 +1219,23 @@ STAGE PLANS: TableScan alias: test_table2 Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 HashTable Sink Operator condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a:test_table1 + Map Operator Tree: TableScan alias: test_table1 Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Map Join Operator condition map: @@ -1356,40 +1243,24 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: (_col0 + _col0) - type: int - expr: concat(_col1, _col3) - type: string + expressions: (_col0 + _col0) (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Local Work: Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1422,34 +1293,23 @@ STAGE PLANS: TableScan alias: test_table1 Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 HashTable Sink Operator condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 _col0 (type: int) Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - b:test_table2 + Map Operator Tree: TableScan alias: test_table2 Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Map Join Operator condition map: @@ -1457,40 +1317,24 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 1 Select Operator - expressions: - expr: (_col0 + _col0) - type: int - expr: concat(_col1, _col3) - type: string + expressions: (_col0 + _col0) (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Local Work: Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1499,55 +1343,36 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:test_table1 + Map Operator Tree: TableScan alias: test_table1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: (_col0 + _col0) - type: int - expr: concat(_col1, _col3) - type: string + expressions: (_col0 + _col0) (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out index f487b28..0b79985 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out @@ -44,9 +44,6 @@ SELECT x.value, x.key from POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -55,20 +52,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: int + expressions: value (type: string), key (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -182,9 +176,6 @@ POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSch POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -193,43 +184,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToInteger(_col1) - type: int + key expressions: UDFToInteger(_col1) (type: int) sort order: + - Map-reduce partition columns: - expr: UDFToInteger(_col1) - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: UDFToInteger(_col1) (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out index 745fbf2..6e0e3be 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out @@ -67,9 +67,6 @@ POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSch POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -78,34 +75,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col0 - type: int - expr: concat(_col1, _col6) - type: string + expressions: _col0 (type: int), _col0 (type: int), concat(_col1, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -262,9 +250,6 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchem POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) - STAGE DEPENDENCIES: Stage-6 is a root stage , consists of Stage-4, Stage-5, Stage-1 Stage-4 has a backup stage: Stage-1 @@ -279,8 +264,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -289,33 +273,18 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: int), _col1 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -330,7 +299,6 @@ STAGE PLANS: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -354,8 +322,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -364,33 +331,18 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: int), _col1 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -405,7 +357,6 @@ STAGE PLANS: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -414,48 +365,32 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out index 404ab41..0a34f9d 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out @@ -67,9 +67,6 @@ POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSch POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) - STAGE DEPENDENCIES: Stage-6 is a root stage , consists of Stage-4, Stage-5, Stage-1 Stage-4 has a backup stage: Stage-1 @@ -84,8 +81,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -94,33 +90,18 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col6) - type: string + expressions: _col0 (type: int), concat(_col1, _col6) (type: string) outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: - - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -135,7 +116,6 @@ STAGE PLANS: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -159,8 +139,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -169,33 +148,18 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col6) - type: string + expressions: _col0 (type: int), concat(_col1, _col6) (type: string) outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: - - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -210,7 +174,6 @@ STAGE PLANS: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -219,48 +182,32 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col6) - type: string + expressions: _col0 (type: int), concat(_col1, _col6) (type: string) outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: - - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -373,9 +320,6 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSch POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))))) - STAGE DEPENDENCIES: Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 Stage-7 has a backup stage: Stage-1 @@ -401,34 +345,23 @@ STAGE PLANS: TableScan alias: test_table2 Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 HashTable Sink Operator condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 _col0 (type: int) Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a:test_table1 + Map Operator Tree: TableScan alias: test_table1 Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Map Join Operator condition map: @@ -436,40 +369,24 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col3) - type: string + expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: - - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Local Work: Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -502,34 +419,23 @@ STAGE PLANS: TableScan alias: test_table1 Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 HashTable Sink Operator condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 _col0 (type: int) Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - b:test_table2 + Map Operator Tree: TableScan alias: test_table2 Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Map Join Operator condition map: @@ -537,40 +443,24 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col3) - type: string + expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: - - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Local Work: Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -579,55 +469,36 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:test_table1 + Map Operator Tree: TableScan alias: test_table1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col3) - type: string + expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: - - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out index f3a86ed..2fda040 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out @@ -72,9 +72,6 @@ POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchem POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key2)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -83,34 +80,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 91 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {key2} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[key2]] - 1 [Column[key], Column[key2]] + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) outputColumnNames: _col0, _col1, _col2, _col8 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: concat(_col2, _col8) - type: string + expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col8) (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -252,9 +240,6 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchem POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key2)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)) value)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -263,34 +248,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 91 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {key2} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[key2]] - 1 [Column[key], Column[key2]] + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) outputColumnNames: _col0, _col1, _col2, _col8 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: concat(_col2, _col8) - type: string + expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col8) (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -444,9 +420,6 @@ POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a. POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) - STAGE DEPENDENCIES: Stage-6 is a root stage , consists of Stage-4, Stage-5, Stage-1 Stage-4 has a backup stage: Stage-1 @@ -461,8 +434,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -471,41 +443,18 @@ STAGE PLANS: condition expressions: 0 {key} {key2} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[key2]] - 1 [Column[key], Column[key2]] + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) outputColumnNames: _col0, _col1, _col2, _col8 - Position of Big Table: 0 Select Operator - expressions: - expr: _col1 - type: int - expr: _col0 - type: int - expr: concat(_col2, _col8) - type: string + expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col8) (type: string) outputColumnNames: _col0, _col1, _col2 Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: +- - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -520,7 +469,6 @@ STAGE PLANS: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -544,8 +492,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -554,41 +501,18 @@ STAGE PLANS: condition expressions: 0 {key} {key2} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[key2]] - 1 [Column[key], Column[key2]] + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) outputColumnNames: _col0, _col1, _col2, _col8 - Position of Big Table: 1 Select Operator - expressions: - expr: _col1 - type: int - expr: _col0 - type: int - expr: concat(_col2, _col8) - type: string + expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col8) (type: string) outputColumnNames: _col0, _col1, _col2 Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: +- - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -603,7 +527,6 @@ STAGE PLANS: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -612,56 +535,32 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 91 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {key2} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[key2]] - 1 [Column[key], Column[key2]] + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) outputColumnNames: _col0, _col1, _col2, _col8 - Position of Big Table: 0 Select Operator - expressions: - expr: _col1 - type: int - expr: _col0 - type: int - expr: concat(_col2, _col8) - type: string + expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col8) (type: string) outputColumnNames: _col0, _col1, _col2 Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: +- - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -702,9 +601,6 @@ POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a. POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key2)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)) value)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value))))) - STAGE DEPENDENCIES: Stage-6 is a root stage , consists of Stage-4, Stage-5, Stage-1 Stage-4 has a backup stage: Stage-1 @@ -719,8 +615,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -729,41 +624,18 @@ STAGE PLANS: condition expressions: 0 {key} {key2} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[key2]] - 1 [Column[key], Column[key2]] + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) outputColumnNames: _col0, _col1, _col2, _col8 - Position of Big Table: 0 Select Operator - expressions: - expr: _col1 - type: int - expr: _col0 - type: int - expr: concat(_col2, _col8) - type: string + expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col8) (type: string) outputColumnNames: _col0, _col1, _col2 Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: +- - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -778,7 +650,6 @@ STAGE PLANS: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -802,8 +673,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - subq1:b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -812,41 +682,18 @@ STAGE PLANS: condition expressions: 0 {key} {key2} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[key2]] - 1 [Column[key], Column[key2]] + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) outputColumnNames: _col0, _col1, _col2, _col8 - Position of Big Table: 1 Select Operator - expressions: - expr: _col1 - type: int - expr: _col0 - type: int - expr: concat(_col2, _col8) - type: string + expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col8) (type: string) outputColumnNames: _col0, _col1, _col2 Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: +- - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -861,7 +708,6 @@ STAGE PLANS: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -870,56 +716,32 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 91 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {key2} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[key2]] - 1 [Column[key], Column[key2]] + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) outputColumnNames: _col0, _col1, _col2, _col8 - Position of Big Table: 0 Select Operator - expressions: - expr: _col1 - type: int - expr: _col0 - type: int - expr: concat(_col2, _col8) - type: string + expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col8) (type: string) outputColumnNames: _col0, _col1, _col2 Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: +- - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -966,9 +788,6 @@ POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a. POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key2)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)) value)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value))))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -977,34 +796,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 91 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {key2} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[key2]] - 1 [Column[key], Column[key2]] + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) outputColumnNames: _col0, _col1, _col2, _col8 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: concat(_col2, _col8) - type: string + expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col8) (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1188,9 +998,6 @@ POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a. POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key2)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)) value)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key2) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key) k2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value))))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) k2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) k1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1199,34 +1006,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 91 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {key2} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[key2]] - 1 [Column[key], Column[key2]] + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) outputColumnNames: _col0, _col1, _col2, _col8 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: concat(_col2, _col8) - type: string + expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col8) (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1447,9 +1245,6 @@ POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a. POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key2)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)) value)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key2) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key) k2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value))))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table4) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) k2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) k1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) value))))) - STAGE DEPENDENCIES: Stage-6 is a root stage , consists of Stage-4, Stage-5, Stage-1 Stage-4 has a backup stage: Stage-1 @@ -1464,8 +1259,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:a + Map Operator Tree: TableScan alias: a Map Join Operator @@ -1474,41 +1268,18 @@ STAGE PLANS: condition expressions: 0 {key} {key2} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[key2]] - 1 [Column[key], Column[key2]] + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) outputColumnNames: _col0, _col1, _col2, _col8 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: concat(_col2, _col8) - type: string + expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col8) (type: string) outputColumnNames: _col0, _col1, _col2 Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: -- - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -1523,7 +1294,6 @@ STAGE PLANS: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1547,8 +1317,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:b + Map Operator Tree: TableScan alias: b Map Join Operator @@ -1557,41 +1326,18 @@ STAGE PLANS: condition expressions: 0 {key} {key2} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[key2]] - 1 [Column[key], Column[key2]] + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) outputColumnNames: _col0, _col1, _col2, _col8 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: concat(_col2, _col8) - type: string + expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col8) (type: string) outputColumnNames: _col0, _col1, _col2 Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: -- - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -1606,7 +1352,6 @@ STAGE PLANS: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1615,56 +1360,32 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 91 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {key2} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[key2]] - 1 [Column[key], Column[key2]] + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) outputColumnNames: _col0, _col1, _col2, _col8 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: concat(_col2, _col8) - type: string + expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col8) (type: string) outputColumnNames: _col0, _col1, _col2 Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: -- - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out index a40c985..0b0b79f 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out @@ -69,9 +69,6 @@ POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSch POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1')) (or (= (. (TOK_TABLE_OR_COL a) key) 0) (= (. (TOK_TABLE_OR_COL a) key) 5)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -80,36 +77,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key = 0) or (key = 5)) - type: boolean + predicate: ((key = 0) or (key = 5)) (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col6) - type: string + expressions: _col0 (type: int), concat(_col1, _col6) (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -235,9 +224,6 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSch POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (or (= (TOK_TABLE_OR_COL key) 0) (= (TOK_TABLE_OR_COL key) 5)))))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (or (= (TOK_TABLE_OR_COL key) 0) (= (TOK_TABLE_OR_COL key) 5)))))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -246,43 +232,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:test_table1 + Map Operator Tree: TableScan alias: test_table1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key = 0) or (key = 5)) - type: boolean + predicate: ((key = 0) or (key = 5)) (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col3) - type: string + expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -422,9 +397,6 @@ POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldS POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (< (TOK_TABLE_OR_COL key) 8))))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (< (TOK_TABLE_OR_COL key) 8))))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (or (= (. (TOK_TABLE_OR_COL a) key) 0) (= (. (TOK_TABLE_OR_COL a) key) 5))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -433,43 +405,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:test_table1 + Map Operator Tree: TableScan alias: test_table1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 8) and ((key = 0) or (key = 5))) - type: boolean + predicate: ((key < 8) and ((key = 0) or (key = 5))) (type: boolean) + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col3) - type: string + expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out index 2f49a64..4401ddc 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out @@ -67,9 +67,6 @@ POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSch POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -78,34 +75,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col5 - type: int - expr: concat(_col1, _col6) - type: string + expressions: _col0 (type: int), _col5 (type: int), concat(_col1, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -233,9 +221,6 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchem POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table2)b.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -244,34 +229,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Position of Big Table: 0 Select Operator - expressions: - expr: _col5 - type: int - expr: _col0 - type: int - expr: concat(_col1, _col6) - type: string + expressions: _col5 (type: int), _col0 (type: int), concat(_col1, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/case_sensitivity.q.out ql/src/test/results/clientpositive/case_sensitivity.q.out index c967c94..5c66431 100644 --- ql/src/test/results/clientpositive/case_sensitivity.q.out +++ ql/src/test/results/clientpositive/case_sensitivity.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM SRC_THRIFT INSERT OVERWRITE TABLE dest1 SELECT src_Thrift.LINT[1], src_thrift.lintstring[0].MYSTRING where src_thrift.liNT[0] > 0 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC_THRIFT))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR ([ (. (TOK_TABLE_OR_COL src_Thrift) LINT) 1)) (TOK_SELEXPR (. ([ (. (TOK_TABLE_OR_COL src_thrift) lintstring) 0) MYSTRING))) (TOK_WHERE (> ([ (. (TOK_TABLE_OR_COL src_thrift) liNT) 0) 0)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -27,24 +24,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_thrift + Map Operator Tree: TableScan alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (lint[0] > 0) - type: boolean + predicate: (lint[0] > 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: lint[1] - type: int - expr: lintstring[0].MYSTRING - type: string + expressions: lint[1] (type: int), lintstring[0].MYSTRING (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -75,12 +68,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -89,12 +80,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/cast1.q.out ql/src/test/results/clientpositive/cast1.q.out index dd6e6ef..3683de2 100644 --- ql/src/test/results/clientpositive/cast1.q.out +++ ql/src/test/results/clientpositive/cast1.q.out @@ -9,9 +9,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT 3 + 2, 3.0 + 2, 3 + 2.0, 3.0 + 2.0, 3 + CAST(2.0 AS INT) + CAST(CAST(0 AS SMALLINT) AS INT), CAST(1 AS BOOLEAN), CAST(TRUE AS INT) WHERE src.key = 86 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (+ 3 2)) (TOK_SELEXPR (+ 3.0 2)) (TOK_SELEXPR (+ 3 2.0)) (TOK_SELEXPR (+ 3.0 2.0)) (TOK_SELEXPR (+ (+ 3 (TOK_FUNCTION TOK_INT 2.0)) (TOK_FUNCTION TOK_INT (TOK_FUNCTION TOK_SMALLINT 0)))) (TOK_SELEXPR (TOK_FUNCTION TOK_BOOLEAN 1)) (TOK_SELEXPR (TOK_FUNCTION TOK_INT TRUE))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) key) 86)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -25,34 +22,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (3 + 2) - type: int - expr: (3.0 + 2) - type: double - expr: (3 + 2.0) - type: double - expr: (3.0 + 2.0) - type: double - expr: ((3 + UDFToInteger(2.0)) + UDFToInteger(UDFToShort(0))) - type: int - expr: UDFToBoolean(1) - type: boolean - expr: UDFToInteger(true) - type: int + expressions: (3 + 2) (type: int), (3.0 + 2) (type: double), (3 + 2.0) (type: double), (3.0 + 2.0) (type: double), ((3 + UDFToInteger(2.0)) + UDFToInteger(UDFToShort(0))) (type: int), UDFToBoolean(1) (type: boolean), UDFToInteger(true) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -83,12 +66,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -97,12 +78,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/cluster.q.out ql/src/test/results/clientpositive/cluster.q.out index 8d14a1d..0cd0886 100644 --- ql/src/test/results/clientpositive/cluster.q.out +++ ql/src/test/results/clientpositive/cluster.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM SRC x where x.key = 10 CLUSTER BY x.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 10)) (TOK_CLUSTERBY (. (TOK_TABLE_OR_COL x) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,40 +11,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 10) - type: boolean + predicate: (key = 10) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -72,9 +58,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM SRC x where x.key = 20 CLUSTER BY key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -82,40 +65,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -140,9 +112,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.* FROM SRC x where x.key = 20 CLUSTER BY key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -150,40 +119,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -208,9 +166,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.* FROM SRC x where x.key = 20 CLUSTER BY x.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_CLUSTERBY (. (TOK_TABLE_OR_COL x) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -218,40 +173,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -276,9 +220,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.key, x.value as v1 FROM SRC x where x.key = 20 CLUSTER BY key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) v1)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -286,40 +227,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -344,9 +274,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.key, x.value as v1 FROM SRC x where x.key = 20 CLUSTER BY x.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) v1)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_CLUSTERBY (. (TOK_TABLE_OR_COL x) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -354,40 +281,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -412,9 +328,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.key, x.value as v1 FROM SRC x where x.key = 20 CLUSTER BY v1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) v1)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL v1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -422,40 +335,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -480,9 +382,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT y.* from (SELECT x.* FROM SRC x CLUSTER BY x.key) y where y.key = 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))) (TOK_CLUSTERBY (. (TOK_TABLE_OR_COL x) key)))) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME y)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL y) key) 20)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -490,47 +389,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - y:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 = 20) - type: boolean + predicate: (_col0 = 20) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -555,9 +440,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.key, x.value as v1, y.key FROM SRC x JOIN SRC y ON (x.key = y.key) where x.key = 20 CLUSTER BY v1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME SRC) x) (TOK_TABREF (TOK_TABNAME SRC) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) v1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL v1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -566,47 +448,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: y + alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -614,20 +480,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -635,30 +495,20 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -683,9 +533,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.key, x.value as v1, y.* FROM SRC x JOIN SRC y ON (x.key = y.key) where x.key = 20 CLUSTER BY v1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME SRC) x) (TOK_TABREF (TOK_TABNAME SRC) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) v1) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME y)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL v1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -694,49 +541,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: y + alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -744,22 +573,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -767,32 +588,20 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -817,9 +626,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.key, x.value as v1, y.* FROM SRC x JOIN SRC y ON (x.key = y.key) where x.key = 20 CLUSTER BY x.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME SRC) x) (TOK_TABREF (TOK_TABNAME SRC) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) v1) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME y)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_CLUSTERBY (. (TOK_TABLE_OR_COL x) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -828,49 +634,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: y + alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -878,22 +666,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -901,32 +681,20 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -951,9 +719,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.key, x.value as v1, y.key as yk FROM SRC x JOIN SRC y ON (x.key = y.key) where x.key = 20 CLUSTER BY key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME SRC) x) (TOK_TABREF (TOK_TABNAME SRC) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) v1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) yk)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -962,47 +727,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: y + alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1010,20 +759,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1031,30 +774,20 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1091,9 +824,6 @@ FROM ( ) unioninput CLUSTER BY unioninput.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL src) key) 100)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) 100))))) unioninput)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME unioninput)))) (TOK_CLUSTERBY (. (TOK_TABLE_OR_COL unioninput) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1101,84 +831,57 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:unioninput-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 18 Data size: 3606 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 18 Data size: 3606 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - null-subquery2:unioninput-subquery2:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 3606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 100) - type: boolean + predicate: (key > 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 18 Data size: 3606 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 18 Data size: 3606 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 3606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 18 Data size: 3606 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 18 Data size: 3606 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/column_access_stats.q.out ql/src/test/results/clientpositive/column_access_stats.q.out index c191ae7..dea25e5 100644 --- ql/src/test/results/clientpositive/column_access_stats.q.out +++ ql/src/test/results/clientpositive/column_access_stats.q.out @@ -72,9 +72,6 @@ Columns:val PREHOOK: query: -- More complicated select queries EXPLAIN SELECT key FROM (SELECT key, val FROM T1) subq1 ORDER BY key PREHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -82,29 +79,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -129,9 +122,6 @@ Columns:key 8 PREHOOK: query: EXPLAIN SELECT k FROM (SELECT key as k, val as v FROM T1) subq1 ORDER BY k PREHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) k) (TOK_SELEXPR (TOK_TABLE_OR_COL val) v)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL k))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -139,29 +129,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -218,9 +204,6 @@ SELECT key as c FROM T1 SELECT val as c FROM T1 ) subq1 ORDER BY c PREHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) c)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL val) c))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL c))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -228,58 +211,49 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:subq1-subquery1:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Union + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - null-subquery2:subq1-subquery2:t1 + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string) TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: val - type: string + expressions: val (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Union + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -319,9 +293,6 @@ SELECT key as c FROM T1 SELECT key as c FROM T1 ) subq1 ORDER BY c PREHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) c)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) c))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL c))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -329,58 +300,49 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:subq1-subquery1:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Union + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - null-subquery2:subq1-subquery2:t1 + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string) TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Union + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -451,9 +413,6 @@ FROM T1 JOIN T2 ON T1.key = T2.key ORDER BY T1.key PREHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1)) (TOK_TABREF (TOK_TABNAME T2)) (= (. (TOK_TABLE_OR_COL T1) key) (. (TOK_TABLE_OR_COL T2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL T1) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL T1) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -462,34 +421,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan - alias: t1 + alias: t2 + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - t2 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE TableScan - alias: t2 + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -497,16 +446,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -514,23 +461,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -602,9 +545,6 @@ FROM T1 JOIN T2 ON T1.key = T2.key AND T1.val = 3 and T2.val = 3 ORDER BY T1.key, T1.val PREHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1)) (TOK_TABREF (TOK_TABNAME T2)) (and (AND (= (. (TOK_TABLE_OR_COL T1) key) (. (TOK_TABLE_OR_COL T2) key)) (= (. (TOK_TABLE_OR_COL T1) val) 3)) (= (. (TOK_TABLE_OR_COL T2) val) 3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL T1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL T1) val))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -613,49 +553,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan - alias: t1 + alias: t2 + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (val = 3) - type: boolean + predicate: (val = 3) (type: boolean) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - t2 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: t2 + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (val = 3) - type: boolean + predicate: (val = 3) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -663,22 +585,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -686,31 +600,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -747,9 +649,6 @@ JOIN ON subq1.val = subq2.val ORDER BY subq1.val PREHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL val))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 5)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL val))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) val) (. (TOK_TABLE_OR_COL subq2) val)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) val))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq1) val))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -758,52 +657,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:t1 + Map Operator Tree: TableScan - alias: t1 + alias: t2 + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 5) - type: boolean + predicate: (key = 6) (type: boolean) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: val - type: string + expressions: val (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - subq2:t2 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE TableScan - alias: t2 + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = 6) - type: boolean + predicate: (key = 5) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: val - type: string + expressions: val (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -811,16 +696,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -828,23 +711,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -895,9 +774,6 @@ JOIN T3 ON T3.key = T4.key ORDER BY T3.key, T4.key PREHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 'teststring' val)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key) key)))) T4) (TOK_TABREF (TOK_TABNAME T3)) (= (. (TOK_TABLE_OR_COL T3) key) (. (TOK_TABLE_OR_COL T4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL T3) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL T4) key))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -907,44 +783,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - t4:subq1:t1 + Map Operator Tree: TableScan - alias: t1 + alias: t2 + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - t4:subq2:t2 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE TableScan - alias: t2 + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -952,16 +816,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -969,38 +831,23 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: t3 + Statistics: Num rows: 5 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - t3 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 5 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: t3 Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1008,20 +855,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 17 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 17 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1029,29 +870,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string - expr: _col0 - type: string + key expressions: _col1 (type: string), _col0 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + Statistics: Num rows: 5 Data size: 17 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 5 Data size: 17 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 5 Data size: 17 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out index c7aa9ac..f5ad951 100644 --- ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out +++ ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src_thrift INSERT OVERWRITE TABLE columnarserde_create_shortcut SELECT src_thrift.lint, src_thrift.lstring, src_thrift.mstringstring, src_thrift.aint, src_thrift.astring DISTRIBUTE BY 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME columnarserde_create_shortcut))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) lint)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) lstring)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) mstringstring)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) aint)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) astring))) (TOK_DISTRIBUTEBY 1))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,45 +19,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_thrift + Map Operator Tree: TableScan alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: lint - type: array - expr: lstring - type: array - expr: mstringstring - type: map - expr: aint - type: int - expr: astring - type: string + expressions: lint (type: array), lstring (type: array), mstringstring (type: map), aint (type: int), astring (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: 1 - type: int - tag: -1 - value expressions: - expr: _col0 - type: array - expr: _col1 - type: array - expr: _col2 - type: map - expr: _col3 - type: int - expr: _col4 - type: string + Map-reduce partition columns: 1 (type: int) + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: array), _col1 (type: array), _col2 (type: map), _col3 (type: int), _col4 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat diff --git ql/src/test/results/clientpositive/columnstats_partlvl.q.out ql/src/test/results/clientpositive/columnstats_partlvl.q.out index a7b8df7..97a37eb 100644 --- ql/src/test/results/clientpositive/columnstats_partlvl.q.out +++ ql/src/test/results/clientpositive/columnstats_partlvl.q.out @@ -29,9 +29,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME Employee_Part) (TOK_PARTSPEC (TOK_PARTVAL employeeSalary 2000.0))) (TOK_TABCOLNAME employeeID)) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage @@ -39,42 +36,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - employee_part + Map Operator Tree: TableScan alias: employee_part Select Operator - expressions: - expr: employeeid - type: int + expressions: employeeid (type: int) outputColumnNames: employeeid Group By Operator - aggregations: - expr: compute_stats(employeeid, 16) - bucketGroup: false + aggregations: compute_stats(employeeid, 16) mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct + value expressions: _col0 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: compute_stats(VALUE._col0) - bucketGroup: false + aggregations: compute_stats(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: struct + expressions: _col0 (type: struct) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -95,7 +79,18 @@ POSTHOOK: query: explain extended analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME Employee_Part) (TOK_PARTSPEC (TOK_PARTVAL employeeSalary 2000.0))) (TOK_TABCOLNAME employeeID)) + +TOK_ANALYZE + TOK_TAB + TOK_TABNAME + Employee_Part + TOK_PARTSPEC + TOK_PARTVAL + employeeSalary + 2000.0 + TOK_TABCOLNAME + employeeID + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -104,28 +99,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - employee_part + Map Operator Tree: TableScan alias: employee_part GatherStats: false Select Operator - expressions: - expr: employeeid - type: int + expressions: employeeid (type: int) outputColumnNames: employeeid Group By Operator - aggregations: - expr: compute_stats(employeeid, 16) - bucketGroup: false + aggregations: compute_stats(employeeid, 16) mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: struct + value expressions: _col0 (type: struct) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -177,15 +165,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: compute_stats(VALUE._col0) - bucketGroup: false + aggregations: compute_stats(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: struct + expressions: _col0 (type: struct) outputColumnNames: _col0 File Output Operator compressed: false @@ -233,9 +217,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME Employee_Part) (TOK_PARTSPEC (TOK_PARTVAL employeeSalary 4000.0))) (TOK_TABCOLNAME employeeID)) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage @@ -243,42 +224,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - employee_part + Map Operator Tree: TableScan alias: employee_part Select Operator - expressions: - expr: employeeid - type: int + expressions: employeeid (type: int) outputColumnNames: employeeid Group By Operator - aggregations: - expr: compute_stats(employeeid, 16) - bucketGroup: false + aggregations: compute_stats(employeeid, 16) mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct + value expressions: _col0 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: compute_stats(VALUE._col0) - bucketGroup: false + aggregations: compute_stats(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: struct + expressions: _col0 (type: struct) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -299,7 +267,18 @@ POSTHOOK: query: explain extended analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME Employee_Part) (TOK_PARTSPEC (TOK_PARTVAL employeeSalary 4000.0))) (TOK_TABCOLNAME employeeID)) + +TOK_ANALYZE + TOK_TAB + TOK_TABNAME + Employee_Part + TOK_PARTSPEC + TOK_PARTVAL + employeeSalary + 4000.0 + TOK_TABCOLNAME + employeeID + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -308,28 +287,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - employee_part + Map Operator Tree: TableScan alias: employee_part GatherStats: false Select Operator - expressions: - expr: employeeid - type: int + expressions: employeeid (type: int) outputColumnNames: employeeid Group By Operator - aggregations: - expr: compute_stats(employeeid, 16) - bucketGroup: false + aggregations: compute_stats(employeeid, 16) mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: struct + value expressions: _col0 (type: struct) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -381,15 +353,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: compute_stats(VALUE._col0) - bucketGroup: false + aggregations: compute_stats(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: struct + expressions: _col0 (type: struct) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/columnstats_tbllvl.q.out ql/src/test/results/clientpositive/columnstats_tbllvl.q.out index 5029b7d..eb1613b 100644 --- ql/src/test/results/clientpositive/columnstats_tbllvl.q.out +++ ql/src/test/results/clientpositive/columnstats_tbllvl.q.out @@ -39,9 +39,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME UserVisits_web_text_none)) (TOK_TABCOLNAME sourceIP avgTimeOnSite adRevenue)) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage @@ -49,58 +46,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - uservisits_web_text_none + Map Operator Tree: TableScan alias: uservisits_web_text_none Select Operator - expressions: - expr: sourceip - type: string - expr: avgtimeonsite - type: int - expr: adrevenue - type: float + expressions: sourceip (type: string), avgtimeonsite (type: int), adrevenue (type: float) outputColumnNames: sourceip, avgtimeonsite, adrevenue Group By Operator - aggregations: - expr: compute_stats(sourceip, 16) - expr: compute_stats(avgtimeonsite, 16) - expr: compute_stats(adrevenue, 16) - bucketGroup: false + aggregations: compute_stats(sourceip, 16), compute_stats(avgtimeonsite, 16), compute_stats(adrevenue, 16) mode: hash outputColumnNames: _col0, _col1, _col2 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct - expr: _col1 - type: struct - expr: _col2 - type: struct + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: compute_stats(VALUE._col0) - expr: compute_stats(VALUE._col1) - expr: compute_stats(VALUE._col2) - bucketGroup: false + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Select Operator - expressions: - expr: _col0 - type: struct - expr: _col1 - type: struct - expr: _col2 - type: struct + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -120,7 +88,16 @@ POSTHOOK: query: explain extended analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME UserVisits_web_text_none)) (TOK_TABCOLNAME sourceIP avgTimeOnSite adRevenue)) + +TOK_ANALYZE + TOK_TAB + TOK_TABNAME + UserVisits_web_text_none + TOK_TABCOLNAME + sourceIP + avgTimeOnSite + adRevenue + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -129,38 +106,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - uservisits_web_text_none + Map Operator Tree: TableScan alias: uservisits_web_text_none GatherStats: false Select Operator - expressions: - expr: sourceip - type: string - expr: avgtimeonsite - type: int - expr: adrevenue - type: float + expressions: sourceip (type: string), avgtimeonsite (type: int), adrevenue (type: float) outputColumnNames: sourceip, avgtimeonsite, adrevenue Group By Operator - aggregations: - expr: compute_stats(sourceip, 16) - expr: compute_stats(avgtimeonsite, 16) - expr: compute_stats(adrevenue, 16) - bucketGroup: false + aggregations: compute_stats(sourceip, 16), compute_stats(avgtimeonsite, 16), compute_stats(adrevenue, 16) mode: hash outputColumnNames: _col0, _col1, _col2 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: struct - expr: _col1 - type: struct - expr: _col2 - type: struct + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -213,21 +173,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: compute_stats(VALUE._col0) - expr: compute_stats(VALUE._col1) - expr: compute_stats(VALUE._col2) - bucketGroup: false + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Select Operator - expressions: - expr: _col0 - type: struct - expr: _col1 - type: struct - expr: _col2 - type: struct + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false @@ -289,9 +239,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME empty_tab)) (TOK_TABCOLNAME a b c d e)) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage @@ -299,74 +246,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - empty_tab + Map Operator Tree: TableScan alias: empty_tab Select Operator - expressions: - expr: a - type: int - expr: b - type: double - expr: c - type: string - expr: d - type: boolean - expr: e - type: binary + expressions: a (type: int), b (type: double), c (type: string), d (type: boolean), e (type: binary) outputColumnNames: a, b, c, d, e Group By Operator - aggregations: - expr: compute_stats(a, 16) - expr: compute_stats(b, 16) - expr: compute_stats(c, 16) - expr: compute_stats(d, 16) - expr: compute_stats(e, 16) - bucketGroup: false + aggregations: compute_stats(a, 16), compute_stats(b, 16), compute_stats(c, 16), compute_stats(d, 16), compute_stats(e, 16) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct - expr: _col1 - type: struct - expr: _col2 - type: struct - expr: _col3 - type: struct - expr: _col4 - type: struct + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: compute_stats(VALUE._col0) - expr: compute_stats(VALUE._col1) - expr: compute_stats(VALUE._col2) - expr: compute_stats(VALUE._col3) - expr: compute_stats(VALUE._col4) - bucketGroup: false + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Operator - expressions: - expr: _col0 - type: struct - expr: _col1 - type: struct - expr: _col2 - type: struct - expr: _col3 - type: struct - expr: _col4 - type: struct + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/combine2_hadoop20.q.out ql/src/test/results/clientpositive/combine2_hadoop20.q.out index 79097d3..5ba4f1f 100644 --- ql/src/test/results/clientpositive/combine2_hadoop20.q.out +++ ql/src/test/results/clientpositive/combine2_hadoop20.q.out @@ -100,9 +100,6 @@ POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.Fiel POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME combine2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -110,33 +107,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - combine2 + Map Operator Tree: TableScan alias: combine2 + Statistics: Num rows: 12 Data size: 14 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 14 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 12 Data size: 14 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 12 Data size: 14 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 14 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -205,7 +194,27 @@ POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.Fiel POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME combine2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL value))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + combine2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_WHERE + TOK_FUNCTION + TOK_ISNOTNULL + TOK_TABLE_OR_COL + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -214,32 +223,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - combine2 + Map Operator Tree: TableScan alias: combine2 - Statistics: - numRows: 12 dataSize: 14 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 12 Data size: 14 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator - Statistics: - numRows: 12 dataSize: 14 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 12 Data size: 14 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -583,27 +583,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -671,9 +664,6 @@ POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.Fiel POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL ds))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -681,56 +671,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: ds - type: string + expressions: ds (type: string) outputColumnNames: ds + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: ds - type: string + aggregations: count(1) + keys: ds (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/constant_prop.q.out ql/src/test/results/clientpositive/constant_prop.q.out index 67ec567..a1558e5 100644 --- ql/src/test/results/clientpositive/constant_prop.q.out +++ ql/src/test/results/clientpositive/constant_prop.q.out @@ -20,9 +20,6 @@ SELECT NAMED_STRUCT( ).F2 FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION NAMED_STRUCT (TOK_FUNCTION IF (TOK_FUNCTION ARRAY_CONTAINS (TOK_FUNCTION ARRAY 1 2) 3) "F1" "B1") 1 (TOK_FUNCTION IF (TOK_FUNCTION ARRAY_CONTAINS (TOK_FUNCTION MAP_KEYS (TOK_FUNCTION MAP "b" "x")) "b") "F2" "B2") 2)) (TOK_SELEXPR (. (TOK_FUNCTION NAMED_STRUCT (TOK_FUNCTION IF (TOK_FUNCTION ARRAY_CONTAINS (TOK_FUNCTION ARRAY 1 2) 3) "F1" "B1") 1 (TOK_FUNCTION IF (TOK_FUNCTION ARRAY_CONTAINS (TOK_FUNCTION MAP_KEYS (TOK_FUNCTION MAP "b" "x")) "b") "F2" "B2") 2) F2))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -34,13 +31,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: named_struct(if(array_contains(array(1,2), 3), 'F1', 'B1'),1,if(array_contains(map_keys(map('b':'x')), 'b'), 'F2', 'B2'),2) - type: struct - expr: named_struct(if(array_contains(array(1,2), 3), 'F1', 'B1'),1,if(array_contains(map_keys(map('b':'x')), 'b'), 'F2', 'B2'),2).F2 - type: int + expressions: named_struct(if(array_contains(array(1,2), 3), 'F1', 'B1'),1,if(array_contains(map_keys(map('b':'x')), 'b'), 'F2', 'B2'),2) (type: struct), named_struct(if(array_contains(array(1,2), 3), 'F1', 'B1'),1,if(array_contains(map_keys(map('b':'x')), 'b'), 'F2', 'B2'),2).F2 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT NAMED_STRUCT( diff --git ql/src/test/results/clientpositive/correlationoptimizer1.q.out ql/src/test/results/clientpositive/correlationoptimizer1.q.out index e033745..f78e701 100644 --- ql/src/test/results/clientpositive/correlationoptimizer1.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer1.q.out @@ -20,9 +20,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -32,34 +29,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - tmp:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -67,25 +54,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -93,48 +75,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -142,35 +108,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -209,9 +165,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -220,76 +173,58 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - tmp:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 60 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -297,35 +232,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -372,9 +297,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -383,48 +305,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -435,33 +346,25 @@ STAGE PLANS: tmp:x TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -469,35 +372,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -542,9 +435,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x LEFT SEMI JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -554,46 +444,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - tmp:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -601,25 +478,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -627,48 +499,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -676,35 +532,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -743,9 +589,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x LEFT SEMI JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -754,88 +597,67 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - tmp:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 60 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Semi Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -843,35 +665,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -916,9 +728,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -928,34 +737,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - tmp:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -963,25 +762,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -989,48 +783,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1038,35 +816,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1105,9 +873,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1116,76 +881,58 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - tmp:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 60 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1193,35 +940,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1266,9 +1003,6 @@ FROM (SELECT y.key AS key, count(1) AS cnt FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1278,34 +1012,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - tmp:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1313,25 +1037,20 @@ STAGE PLANS: condition expressions: 0 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col4 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: string + expressions: _col4 (type: string) outputColumnNames: _col4 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col4 - type: string + aggregations: count(1) + keys: _col4 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1339,48 +1058,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1388,35 +1091,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1455,9 +1148,6 @@ FROM (SELECT y.key AS key, count(1) AS cnt FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1467,34 +1157,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - tmp:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1502,25 +1182,20 @@ STAGE PLANS: condition expressions: 0 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col4 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: string + expressions: _col4 (type: string) outputColumnNames: _col4 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col4 - type: string + aggregations: count(1) + keys: _col4 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1528,48 +1203,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1577,35 +1236,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1648,9 +1297,6 @@ SELECT x.key, y.value, count(1) AS cnt FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key AND x.value = y.value) GROUP BY x.key, y.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (AND (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)) (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL y) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) value)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1659,45 +1305,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: 0 - value expressions: - expr: key - type: string - y + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: y + alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: 1 - value expressions: - expr: value - type: string + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1705,29 +1331,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col5 (type: string) outputColumnNames: _col0, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col5 - type: string + aggregations: count(1) + keys: _col0 (type: string), _col5 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1735,49 +1352,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1827,9 +1423,6 @@ SELECT x.key, y.value, count(1) AS cnt FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key AND x.value = y.value) GROUP BY x.key, y.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (AND (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)) (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL y) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) value)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1838,45 +1431,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: 0 - value expressions: - expr: key - type: string - y + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: y + alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: 1 - value expressions: - expr: value - type: string + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1884,29 +1457,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col5 (type: string) outputColumnNames: _col0, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col5 - type: string + aggregations: count(1) + keys: _col0 (type: string), _col5 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1914,49 +1478,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2014,9 +1557,6 @@ FROM (SELECT y.key AS key, count(1) AS cnt FROM src1 x RIGHT OUTER JOIN src y ON (x.key = y.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2026,34 +1566,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - tmp:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -2061,25 +1591,20 @@ STAGE PLANS: condition expressions: 0 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col4 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: string + expressions: _col4 (type: string) outputColumnNames: _col4 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col4 - type: string + aggregations: count(1) + keys: _col4 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2087,48 +1612,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2136,35 +1645,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2203,9 +1702,6 @@ FROM (SELECT y.key AS key, count(1) AS cnt FROM src1 x RIGHT OUTER JOIN src y ON (x.key = y.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2214,76 +1710,58 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - tmp:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 60 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Right Outer Join0 to 1 condition expressions: 0 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: string + expressions: _col4 (type: string) outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col4 - type: string + aggregations: count(1) + keys: _col4 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2291,35 +1769,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2364,9 +1832,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x RIGHT OUTER JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2376,34 +1841,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - tmp:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -2411,25 +1866,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2437,48 +1887,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2486,35 +1920,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2553,9 +1977,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x RIGHT OUTER JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2565,34 +1986,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - tmp:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -2600,25 +2011,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2626,48 +2032,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2675,35 +2065,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2750,9 +2130,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2762,34 +2139,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - tmp:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -2797,25 +2164,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2823,48 +2185,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2872,35 +2218,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2939,9 +2275,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2951,34 +2284,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - tmp:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -2986,25 +2309,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3012,48 +2330,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3061,35 +2363,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3130,9 +2422,6 @@ FROM (SELECT x.key AS key, x.value AS value, count(1) AS cnt FROM src1 x JOIN src y ON (x.key = y.key) GROUP BY x.key, x.value) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL x) value)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) value)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3142,36 +2431,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - tmp:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -3179,29 +2456,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3209,57 +2477,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3267,40 +2510,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3339,9 +2567,6 @@ FROM (SELECT x.key AS key, x.value AS value, count(1) AS cnt FROM src1 x JOIN src y ON (x.key = y.key) GROUP BY x.key, x.value) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL x) value)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) value)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3351,36 +2576,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - tmp:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -3388,29 +2601,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3418,57 +2622,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3476,40 +2655,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3550,9 +2714,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key = y.key AND x.value = y.value) GROUP BY x.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (AND (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)) (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL y) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3562,42 +2723,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: 0 - value expressions: - expr: key - type: string - tmp:y + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: 1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -3605,25 +2748,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3631,48 +2769,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3680,35 +2802,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3747,9 +2859,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key = y.key AND x.value = y.value) GROUP BY x.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (AND (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)) (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL y) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3759,42 +2868,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: 0 - value expressions: - expr: key - type: string - tmp:y + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: 1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -3802,25 +2893,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3828,48 +2914,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3877,35 +2947,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/correlationoptimizer10.q.out ql/src/test/results/clientpositive/correlationoptimizer10.q.out index 105f0a7..c44a746 100644 --- ql/src/test/results/clientpositive/correlationoptimizer10.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer10.q.out @@ -26,9 +26,6 @@ FROM LEFT SEMI JOIN src yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_TABREF (TOK_TABNAME src) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 @@ -39,34 +36,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - xx:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -74,25 +61,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -100,41 +82,27 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -142,47 +110,31 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME - TableScan - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - yy + Map Operator Tree: TableScan alias: yy + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -190,18 +142,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -209,27 +157,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -286,9 +226,6 @@ FROM LEFT SEMI JOIN src yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_TABREF (TOK_TABNAME src) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -297,132 +234,104 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx:x - TableScan - alias: x - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - xx:y - TableScan - alias: y - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - yy + Map Operator Tree: TableScan alias: yy + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 2 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + TableScan + alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 62 Data size: 6244 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 62 Data size: 6244 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Semi Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Mux Operator + Statistics: Num rows: 62 Data size: 6244 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Semi Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -430,27 +339,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -529,9 +430,6 @@ LEFT SEMI JOIN y.key > 20) yy ON xx.key=yy.key ORDER BY xx.key, xx.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME src1) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key)) (TOK_WHERE (AND (< (. (TOK_TABLE_OR_COL x) key) 200) (> (. (TOK_TABLE_OR_COL y) key) 20))))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -541,42 +439,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 200) and (key > 20)) - type: boolean + predicate: ((key < 200) and (key > 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - yy:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 20) and (key < 200)) - type: boolean + predicate: ((key > 20) and (key < 200)) (type: boolean) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -584,23 +470,19 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 661 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 661 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 661 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -608,35 +490,22 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - xx + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 661 Basic stats: COMPLETE Column stats: NONE TableScan alias: xx + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -644,18 +513,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 727 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 727 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -663,27 +528,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 6 Data size: 727 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 6 Data size: 727 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 6 Data size: 727 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -746,9 +603,6 @@ LEFT SEMI JOIN y.key > 20) yy ON xx.key=yy.key ORDER BY xx.key, xx.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME src1) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key)) (TOK_WHERE (AND (< (. (TOK_TABLE_OR_COL x) key) 200) (> (. (TOK_TABLE_OR_COL y) key) 20))))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -757,80 +611,58 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx - TableScan - alias: xx - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - yy:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 200) and (key > 20)) - type: boolean + predicate: ((key < 200) and (key > 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - yy:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 20) and (key < 200)) - type: boolean + predicate: ((key > 20) and (key < 200)) (type: boolean) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: xx + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 13 Data size: 1418 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 13 Data size: 1418 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Semi Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -841,32 +673,28 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 13 Data size: 1418 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Semi Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -874,27 +702,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -963,9 +783,6 @@ LEFT SEMI JOIN WHERE x.key < 200 AND x.key > 180) yy ON xx.key=yy.key ORDER BY xx.key, xx.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME src) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key)) (TOK_WHERE (AND (< (. (TOK_TABLE_OR_COL x) key) 200) (> (. (TOK_TABLE_OR_COL x) key) 180))))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -975,42 +792,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 200) and (key > 180)) - type: boolean + predicate: ((key < 200) and (key > 180)) (type: boolean) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - yy:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 200) and (key > 180)) - type: boolean + predicate: ((key < 200) and (key > 180)) (type: boolean) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1018,23 +823,19 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 661 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 661 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 661 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1042,35 +843,22 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - xx + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 661 Basic stats: COMPLETE Column stats: NONE TableScan alias: xx + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1078,18 +866,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1097,27 +881,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1192,9 +968,6 @@ LEFT SEMI JOIN WHERE x.key < 200 AND x.key > 180) yy ON xx.key=yy.key ORDER BY xx.key, xx.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME src) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key)) (TOK_WHERE (AND (< (. (TOK_TABLE_OR_COL x) key) 200) (> (. (TOK_TABLE_OR_COL x) key) 180))))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1203,80 +976,58 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx - TableScan - alias: xx - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - yy:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 200) and (key > 180)) - type: boolean + predicate: ((key < 200) and (key > 180)) (type: boolean) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - yy:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 200) and (key > 180)) - type: boolean + predicate: ((key < 200) and (key > 180)) (type: boolean) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: xx + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 41 Data size: 7014 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 41 Data size: 7014 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Semi Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1287,32 +1038,28 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 41 Data size: 7014 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Semi Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1320,27 +1067,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/correlationoptimizer11.q.out ql/src/test/results/clientpositive/correlationoptimizer11.q.out index b15a3a3..5d81e46 100644 --- ql/src/test/results/clientpositive/correlationoptimizer11.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer11.q.out @@ -61,9 +61,6 @@ POSTHOOK: Lineage: part_table PARTITION(partitionid=1).key SIMPLE [(src)src.Fiel POSTHOOK: Lineage: part_table PARTITION(partitionid=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: part_table PARTITION(partitionid=2).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: part_table PARTITION(partitionid=2).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME part_table) x) (TOK_TABREF (TOK_TABNAME part_table) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (AND (= (. (TOK_TABLE_OR_COL x) partitionId) 1) (= (. (TOK_TABLE_OR_COL y) partitionId) 2))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -72,34 +69,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: x + Statistics: Num rows: 100 Data size: 1070 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 100 Data size: 1070 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -107,25 +94,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 110 Data size: 1177 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 110 Data size: 1177 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 110 Data size: 1177 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -133,41 +115,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 110 Data size: 1177 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 588 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 588 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 55 Data size: 588 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -222,9 +191,6 @@ POSTHOOK: Lineage: part_table PARTITION(partitionid=1).key SIMPLE [(src)src.Fiel POSTHOOK: Lineage: part_table PARTITION(partitionid=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: part_table PARTITION(partitionid=2).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: part_table PARTITION(partitionid=2).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME part_table) x) (TOK_TABREF (TOK_TABNAME part_table) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (AND (= (. (TOK_TABLE_OR_COL x) partitionId) 1) (= (. (TOK_TABLE_OR_COL y) partitionId) 2))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -232,69 +198,54 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: x + Statistics: Num rows: 100 Data size: 1070 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 100 Data size: 1070 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 125 Data size: 1261 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -357,9 +308,6 @@ POSTHOOK: Lineage: part_table PARTITION(partitionid=1).key SIMPLE [(src)src.Fiel POSTHOOK: Lineage: part_table PARTITION(partitionid=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: part_table PARTITION(partitionid=2).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: part_table PARTITION(partitionid=2).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME part_table) x) (TOK_TABREF (TOK_TABNAME part_table) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (AND (= (. (TOK_TABLE_OR_COL x) partitionId) 2) (= (. (TOK_TABLE_OR_COL y) partitionId) 2))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -368,34 +316,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -403,25 +341,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -429,41 +362,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 101 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 13 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -529,9 +449,6 @@ POSTHOOK: Lineage: part_table PARTITION(partitionid=1).key SIMPLE [(src)src.Fiel POSTHOOK: Lineage: part_table PARTITION(partitionid=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: part_table PARTITION(partitionid=2).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: part_table PARTITION(partitionid=2).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME part_table) x) (TOK_TABREF (TOK_TABNAME part_table) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (AND (= (. (TOK_TABLE_OR_COL x) partitionId) 2) (= (. (TOK_TABLE_OR_COL y) partitionId) 2))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -539,69 +456,54 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 50 Data size: 382 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/correlationoptimizer12.q.out ql/src/test/results/clientpositive/correlationoptimizer12.q.out index c515b88..1d74243 100644 --- ql/src/test/results/clientpositive/correlationoptimizer12.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer12.q.out @@ -14,9 +14,6 @@ JOIN (SELECT y.key as key, count(y.value) OVER (PARTITION BY y.key) AS cnt FROM src1 y) yy ON (xx.key=yy.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL x) value) (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (. (TOK_TABLE_OR_COL x) key))))) cnt)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL y) value) (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (. (TOK_TABLE_OR_COL y) key))))) cnt)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -26,39 +23,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: key - type: string + key expressions: key (type: string), key (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - tag: -1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE PTF Operator + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _wcol0 - type: bigint + expressions: _col0 (type: string), _wcol0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -66,39 +51,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -106,22 +73,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -129,39 +89,27 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: key - type: string + key expressions: key (type: string), key (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - tag: -1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE PTF Operator + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _wcol0 - type: bigint + expressions: _col0 (type: string), _wcol0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/correlationoptimizer13.q.out ql/src/test/results/clientpositive/correlationoptimizer13.q.out index 19327ce..448d4d2 100644 --- ql/src/test/results/clientpositive/correlationoptimizer13.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer13.q.out @@ -47,9 +47,6 @@ POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) c1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) c3) key2) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) c1) 120)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) c3) (. (TOK_TABLE_OR_COL x) c1)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) c1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) c3) key2) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) c2) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) c3) (. (TOK_TABLE_OR_COL x1) c1)))) yy) (AND (= (. (TOK_TABLE_OR_COL xx) key1) (. (TOK_TABLE_OR_COL yy) key1)) (== (. (TOK_TABLE_OR_COL xx) key2) (. (TOK_TABLE_OR_COL yy) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key2)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key2)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -60,72 +57,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:x1 + Map Operator Tree: TableScan alias: x1 + Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (c2 > 100) - type: boolean + predicate: (c2 > 100) (type: boolean) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: c3 - type: string - expr: c1 - type: int + expressions: c3 (type: string), c1 (type: int) outputColumnNames: c3, c1 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: c3 - type: string - expr: c1 - type: int + aggregations: count(1) + keys: c3 (type: string), c1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: int + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: int - expr: _col0 - type: string - expr: _col2 - type: bigint + expressions: _col1 (type: int), _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -133,51 +100,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: string - tag: 1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -185,26 +122,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col3 - type: int - expr: _col4 - type: string - expr: _col2 - type: bigint - expr: _col5 - type: bigint + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -212,43 +137,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - expr: _col4 - type: bigint - expr: _col5 - type: bigint + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: bigint) sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - expr: _col4 - type: bigint - expr: _col5 - type: bigint + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -256,72 +157,42 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (c1 < 120) - type: boolean + predicate: (c1 < 120) (type: boolean) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: c3 - type: string - expr: c1 - type: int + expressions: c3 (type: string), c1 (type: int) outputColumnNames: c3, c1 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: c3 - type: string - expr: c1 - type: int + aggregations: count(1) + keys: c3 (type: string), c1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: int + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: int - expr: _col0 - type: string - expr: _col2 - type: bigint + expressions: _col1 (type: int), _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/correlationoptimizer14.q.out ql/src/test/results/clientpositive/correlationoptimizer14.q.out index ca47499..f2d0877 100644 --- ql/src/test/results/clientpositive/correlationoptimizer14.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer14.q.out @@ -32,9 +32,6 @@ JOIN (SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key) yy ON (xx.key=yy.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value)) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value)) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -44,33 +41,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -78,39 +66,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -118,22 +88,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -141,33 +104,24 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -195,9 +149,6 @@ JOIN (SELECT y.key as key, y.value as value FROM src1 y SORT BY key) yy ON (xx.key=yy.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -207,33 +158,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -241,39 +183,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -281,22 +205,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -304,33 +221,24 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -358,9 +266,6 @@ JOIN (SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy ON (xx.key=yy.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value)) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value)) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -370,36 +275,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -407,39 +301,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -447,22 +323,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -470,36 +339,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -582,9 +440,6 @@ JOIN (SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key) yy ON (xx.key=yy.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value)) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value)) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -592,109 +447,78 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - yy:y + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan - alias: y + alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Extract + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 60 Data size: 12056 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Extract + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 60 Data size: 12056 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -781,9 +605,6 @@ JOIN (SELECT y.key as key, y.value as value FROM src1 y DISTRIBUTE BY key SORT BY key DESC) yy ON (xx.key=yy.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value)) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL key))))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value)) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL key))))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -793,36 +614,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: - - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -830,39 +640,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -870,22 +662,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -893,36 +678,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: - - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -952,9 +726,6 @@ JOIN (SELECT y.key as key, y.value as value FROM src1 y ORDER BY key) yy ON (xx.key=yy.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -964,33 +735,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -998,39 +760,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1038,22 +782,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1061,33 +798,24 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1115,9 +843,6 @@ JOIN (SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy ON (xx.key=yy.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -1127,36 +852,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1164,39 +878,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1204,22 +900,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1227,36 +916,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1339,9 +1017,6 @@ JOIN (SELECT y.key as key, y.value as value FROM src1 y Cluster BY key) yy ON (xx.key=yy.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1349,109 +1024,78 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - yy:y + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan - alias: y + alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Extract + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 60 Data size: 12056 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Extract + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 60 Data size: 12056 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1538,9 +1182,6 @@ JOIN (SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy ON (xx.key=yy.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) value)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -1550,56 +1191,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1607,39 +1231,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1647,22 +1253,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1670,36 +1269,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1782,9 +1370,6 @@ JOIN (SELECT y.key as key, count(*) as value FROM src1 y GROUP BY y.key ORDER BY key) yy ON (xx.key=yy.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) value)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1792,129 +1377,92 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx:x - TableScan - alias: x - Select Operator - expressions: - expr: key - type: string - expr: value - type: string - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - yy:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan + alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 31 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Extract + Statistics: Num rows: 31 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 46 Data size: 8944 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 2916 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 2916 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 46 Data size: 8944 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/correlationoptimizer15.q.out ql/src/test/results/clientpositive/correlationoptimizer15.q.out index 3341e93..12914cc 100644 --- ql/src/test/results/clientpositive/correlationoptimizer15.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer15.q.out @@ -18,9 +18,6 @@ FROM JOIN src yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_TABREF (TOK_TABNAME src) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 @@ -31,34 +28,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - xx:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -66,25 +53,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -92,41 +74,27 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -134,38 +102,23 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: yy + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - yy + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: yy Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -173,20 +126,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -194,31 +141,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -297,9 +232,6 @@ FROM JOIN src yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_TABREF (TOK_TABNAME src) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -308,127 +240,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan - alias: x + alias: yy + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - xx:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: y + alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - yy + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: yy + alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 62 Data size: 6244 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 62 Data size: 6244 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Mux Operator + Statistics: Num rows: 62 Data size: 6244 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -436,31 +337,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/correlationoptimizer2.q.out ql/src/test/results/clientpositive/correlationoptimizer2.q.out index 9adb4aa..c00b609 100644 --- ql/src/test/results/clientpositive/correlationoptimizer2.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer2.q.out @@ -24,9 +24,6 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key)) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL x) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL y) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) cnt) cnt1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) cnt) cnt2)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt2))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -37,58 +34,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:b:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -96,39 +74,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp:$INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - tmp:$INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -136,31 +96,19 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -168,45 +116,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: sum(VALUE._col3) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -214,58 +142,39 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - tmp:a:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -308,9 +217,6 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key)) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL x) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL y) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) cnt) cnt1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) cnt) cnt2)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt2))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -319,171 +225,114 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:a:x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint - tmp:b:y + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan - alias: y + alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -491,45 +340,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: sum(VALUE._col3) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -574,9 +403,6 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 LEFT OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key)) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL x) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL y) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) cnt) cnt1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) cnt) cnt2)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt2))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -587,58 +413,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:b:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -646,39 +453,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp:$INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - tmp:$INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -686,31 +475,19 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -718,45 +495,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: sum(VALUE._col3) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -764,58 +521,39 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - tmp:a:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -858,9 +596,6 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 LEFT OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key)) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL x) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL y) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) cnt) cnt1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) cnt) cnt2)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt2))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -869,171 +604,114 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:a:x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint - tmp:b:y + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan - alias: y + alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1041,45 +719,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: sum(VALUE._col3) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1124,9 +782,6 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 RIGHT OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key)) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL x) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL y) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) cnt) cnt1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) cnt) cnt2)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt2))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -1137,58 +792,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:b:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1196,39 +832,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp:$INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - tmp:$INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -1236,31 +854,19 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1268,45 +874,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: sum(VALUE._col3) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1314,58 +900,39 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - tmp:a:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1408,9 +975,6 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 RIGHT OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key)) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL x) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL y) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) cnt) cnt1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) cnt) cnt2)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt2))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1419,171 +983,114 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:a:x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint - tmp:b:y + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan - alias: y + alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Right Outer Join0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Right Outer Join0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1591,45 +1098,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: sum(VALUE._col3) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1674,9 +1161,6 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key)) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL x) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL y) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) cnt) cnt1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) cnt) cnt2)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt2))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -1687,58 +1171,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:b:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1746,39 +1211,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp:$INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - tmp:$INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -1786,31 +1233,19 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1818,45 +1253,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: sum(VALUE._col3) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1864,58 +1279,39 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - tmp:a:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1958,9 +1354,6 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key)) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL x) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL y) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) cnt) cnt1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) cnt) cnt2)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt2))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1969,171 +1362,114 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:a:x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint - tmp:b:y + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan - alias: y + alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Outer Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Outer Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2141,45 +1477,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: sum(VALUE._col3) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2224,9 +1540,6 @@ FROM (SELECT a.key AS key, count(1) AS cnt ON (a.key = b.key) GROUP BY a.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL x) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL y) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-5 @@ -2238,56 +1551,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:b:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2295,32 +1591,20 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp:$INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - tmp:$INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -2328,25 +1612,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2354,48 +1633,32 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 15 Data size: 3085 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1439 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1439 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2403,35 +1666,25 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2439,56 +1692,39 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - tmp:a:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2541,9 +1777,6 @@ FROM (SELECT a.key AS key, count(1) AS cnt ON (a.key = b.key) GROUP BY a.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL x) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL y) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2553,155 +1786,116 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:a:x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint - tmp:b:y + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan - alias: y + alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Outer Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 15 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Outer Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2709,48 +1903,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2758,35 +1936,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2839,9 +2007,6 @@ FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b ON (a.key = b.key)) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) val)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) z)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL z) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL z) key)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) val) cnt1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) cnt) cnt2)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt2))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -2852,58 +2017,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:b:z + Map Operator Tree: TableScan alias: z + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2911,39 +2057,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp:$INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - tmp:$INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -2951,31 +2079,19 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2983,45 +2099,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: sum(VALUE._col3) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3029,36 +2125,24 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - tmp:a:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - tmp:a:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -3066,18 +2150,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3120,9 +2200,6 @@ FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b ON (a.key = b.key)) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) val)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) z)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) key) key) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL z) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL z) key)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) val) cnt1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) cnt) cnt2)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL cnt2))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3131,168 +2208,114 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:a:x - TableScan - alias: x - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - tmp:a:y - TableScan - alias: y - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - tmp:b:z + Map Operator Tree: TableScan alias: z + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 2 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan + alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + TableScan + alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 60 Data size: 6244 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 30 Data size: 3122 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 3122 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 3122 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 30 Data size: 3122 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3300,45 +2323,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: sum(VALUE._col3) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/correlationoptimizer3.q.out ql/src/test/results/clientpositive/correlationoptimizer3.q.out index 1f53509..19a0edc 100644 --- ql/src/test/results/clientpositive/correlationoptimizer3.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer3.q.out @@ -26,9 +26,6 @@ FROM (SELECT b.key AS key, b.cnt AS cnt, d.value AS value JOIN (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) d ON b.key = d.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) b) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value))))) d) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) cnt) cnt) (TOK_SELEXPR (. (TOK_TABLE_OR_COL d) value) value)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) value))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -40,34 +37,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:b:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - tmp:b:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -75,25 +62,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -101,41 +83,27 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -143,37 +111,21 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - tmp:$INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - tmp:$INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -181,28 +133,19 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -210,40 +153,25 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -251,36 +179,24 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - tmp:d:x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - tmp:d:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -288,18 +204,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -342,9 +254,6 @@ FROM (SELECT b.key AS key, b.cnt AS cnt, d.value AS value JOIN (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) d ON b.key = d.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) b) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value))))) d) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) cnt) cnt) (TOK_SELEXPR (. (TOK_TABLE_OR_COL d) value) value)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) value))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -353,124 +262,89 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:b:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - tmp:b:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: y + alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - tmp:d:x + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: x + alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: value - type: string - tmp:d:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 3 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 119 Data size: 12056 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -481,44 +355,33 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -526,40 +389,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -604,9 +452,6 @@ FROM (SELECT b.key AS key, b.cnt AS cnt, d.value AS value JOIN (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) d ON b.key = d.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) b) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value))))) d) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) cnt) cnt) (TOK_SELEXPR (. (TOK_TABLE_OR_COL d) value) value)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) value))))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -615,82 +460,61 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp:b:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint - tmp:d:y + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1 - Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -704,89 +528,70 @@ STAGE PLANS: tmp:b:x TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE tmp:d:x TableScan alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 189 Data size: 19179 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Mux Operator + Statistics: Num rows: 189 Data size: 19179 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -794,40 +599,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -870,9 +660,6 @@ FROM (SELECT d.key AS key, d.cnt AS cnt, b.value as value JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key = y.key) group by x.key) d ON b.key = d.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value))))) b) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) d) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL d) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL d) cnt) cnt) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) value)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) value))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-6 @@ -884,36 +671,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:b:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - tmp:b:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -921,18 +696,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -940,37 +711,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp:$INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: string - tmp:$INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -978,28 +733,19 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: bigint - expr: _col1 - type: string + expressions: _col2 (type: string), _col3 (type: bigint), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1007,40 +753,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1048,34 +779,24 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - tmp:d:x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - tmp:d:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1083,25 +804,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1109,41 +825,27 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1186,9 +888,6 @@ FROM (SELECT d.key AS key, d.cnt AS cnt, b.value as value JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key = y.key) group by x.key) d ON b.key = d.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value))))) b) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) d) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL d) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL d) cnt) cnt) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) value)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) value))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1197,109 +896,77 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:b:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - tmp:b:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: y + alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - tmp:d:x + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: x + alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - tmp:d:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 3 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 119 Data size: 12056 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: bigint - expr: _col1 - type: string + expressions: _col2 (type: string), _col3 (type: bigint), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1310,59 +977,45 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: bigint - expr: _col1 - type: string + expressions: _col2 (type: string), _col3 (type: bigint), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1370,40 +1023,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1448,9 +1086,6 @@ FROM (SELECT d.key AS key, d.cnt AS cnt, b.value as value JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key = y.key) group by x.key) d ON b.key = d.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value))))) b) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) d) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL d) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL d) cnt) cnt) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) value)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) value))))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -1459,82 +1094,61 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp:b:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1 - Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: string - tmp:d:y + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -1548,89 +1162,70 @@ STAGE PLANS: tmp:b:x TableScan alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE tmp:d:x TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 189 Data size: 19179 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: bigint - expr: _col1 - type: string + expressions: _col2 (type: string), _col3 (type: bigint), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 189 Data size: 19179 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: bigint - expr: _col1 - type: string + expressions: _col2 (type: string), _col3 (type: bigint), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1638,40 +1233,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/correlationoptimizer4.q.out ql/src/test/results/clientpositive/correlationoptimizer4.q.out index 90656a6..5d31e4e 100644 --- ql/src/test/results/clientpositive/correlationoptimizer4.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer4.q.out @@ -51,9 +51,6 @@ FROM (SELECT y.key AS key, count(1) AS cnt FROM T2 x JOIN T1 y ON (x.key = y.key) JOIN T3 z ON (y.key = z.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME T2) x) (TOK_TABREF (TOK_TABNAME T1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME T3) z) (= (. (TOK_TABLE_OR_COL y) key) (. (TOK_TABLE_OR_COL z) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -63,46 +60,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - tmp:y + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - value expressions: - expr: key - type: int - tmp:z + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 2 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: @@ -112,25 +95,20 @@ STAGE PLANS: 0 1 {VALUE._col0} 2 - handleSkewJoin: false outputColumnNames: _col4 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: int + expressions: _col4 (type: int) outputColumnNames: _col4 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col4 - type: int + aggregations: count(1) + keys: _col4 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -138,48 +116,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -187,35 +149,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -256,9 +208,6 @@ FROM (SELECT y.key AS key, count(1) AS cnt FROM T2 x JOIN T1 y ON (x.key = y.key) JOIN T3 z ON (y.key = z.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME T2) x) (TOK_TABREF (TOK_TABNAME T1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME T3) z) (= (. (TOK_TABLE_OR_COL y) key) (. (TOK_TABLE_OR_COL z) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -267,48 +216,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - tmp:y + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - tmp:z + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 2 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 21 Data size: 90 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 @@ -317,40 +253,31 @@ STAGE PLANS: 0 1 {VALUE._col0} 2 - handleSkewJoin: false outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: int + expressions: _col4 (type: int) outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col4 - type: int + aggregations: count(1) + keys: _col4 (type: int) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -358,35 +285,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -429,9 +346,6 @@ FROM (SELECT y.key AS key, count(1) AS cnt FROM T2 x JOIN T1 y ON (x.key = y.key) JOIN T3 z ON (y.key = z.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME T2) x) (TOK_TABREF (TOK_TABNAME T1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME T3) z) (= (. (TOK_TABLE_OR_COL y) key) (. (TOK_TABLE_OR_COL z) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -440,10 +354,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -452,39 +366,28 @@ STAGE PLANS: 0 1 {key} 2 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col4 - Position of Big Table: 0 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: int + expressions: _col4 (type: int) outputColumnNames: _col4 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col4 - type: int + aggregations: count(1) + keys: _col4 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -498,36 +401,29 @@ STAGE PLANS: tmp:y TableScan alias: y + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE tmp:z TableScan alias: z + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -535,35 +431,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -608,9 +494,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM T2 x LEFT OUTER JOIN T1 y ON (x.key = y.key) LEFT OUTER JOIN T3 z ON (y.key = z.key) GROUP BY x.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME T2) x) (TOK_TABREF (TOK_TABNAME T1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME T3) z) (= (. (TOK_TABLE_OR_COL y) key) (. (TOK_TABLE_OR_COL z) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -620,46 +503,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - tmp:y + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) TableScan - alias: y + alias: z + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - tmp:z + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 2 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -669,25 +538,20 @@ STAGE PLANS: 0 {VALUE._col0} 1 2 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count(1) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -695,48 +559,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -744,35 +592,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -813,9 +651,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM T2 x LEFT OUTER JOIN T1 y ON (x.key = y.key) LEFT OUTER JOIN T3 z ON (y.key = z.key) GROUP BY x.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME T2) x) (TOK_TABREF (TOK_TABNAME T1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME T3) z) (= (. (TOK_TABLE_OR_COL y) key) (. (TOK_TABLE_OR_COL z) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -824,48 +659,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - tmp:y + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) TableScan - alias: y + alias: z + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - tmp:z + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 2 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 21 Data size: 90 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Outer Join0 to 1 @@ -874,40 +696,31 @@ STAGE PLANS: 0 {VALUE._col0} 1 2 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count(1) + keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -915,35 +728,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -990,9 +793,6 @@ FROM (SELECT y.key AS key, count(1) AS cnt FROM T2 x LEFT OUTER JOIN T1 y ON (x.key = y.key) LEFT OUTER JOIN T3 z ON (y.key = z.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME T2) x) (TOK_TABREF (TOK_TABNAME T1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME T3) z) (= (. (TOK_TABLE_OR_COL y) key) (. (TOK_TABLE_OR_COL z) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1002,46 +802,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - tmp:y + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - value expressions: - expr: key - type: int - tmp:z + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 2 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: @@ -1051,25 +837,20 @@ STAGE PLANS: 0 1 {VALUE._col0} 2 - handleSkewJoin: false outputColumnNames: _col4 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: int + expressions: _col4 (type: int) outputColumnNames: _col4 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col4 - type: int + aggregations: count(1) + keys: _col4 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1077,48 +858,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1126,35 +891,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1199,9 +954,6 @@ FROM (SELECT z.key AS key, count(1) AS cnt FROM T2 x RIGHT OUTER JOIN T1 y ON (x.key = y.key) RIGHT OUTER JOIN T3 z ON (y.key = z.key) GROUP BY z.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME T2) x) (TOK_TABREF (TOK_TABNAME T1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME T3) z) (= (. (TOK_TABLE_OR_COL y) key) (. (TOK_TABLE_OR_COL z) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL z) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1211,46 +963,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - tmp:y + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - tmp:z + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) TableScan - alias: z + alias: y + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 2 - value expressions: - expr: key - type: int + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1260,25 +998,20 @@ STAGE PLANS: 0 1 2 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col8 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col8 - type: int + expressions: _col8 (type: int) outputColumnNames: _col8 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col8 - type: int + aggregations: count(1) + keys: _col8 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1286,48 +1019,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1335,35 +1052,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1404,9 +1111,6 @@ FROM (SELECT z.key AS key, count(1) AS cnt FROM T2 x RIGHT OUTER JOIN T1 y ON (x.key = y.key) RIGHT OUTER JOIN T3 z ON (y.key = z.key) GROUP BY z.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME T2) x) (TOK_TABREF (TOK_TABNAME T1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME T3) z) (= (. (TOK_TABLE_OR_COL y) key) (. (TOK_TABLE_OR_COL z) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL z) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1415,48 +1119,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 2 - tmp:y + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - tmp:z + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) TableScan - alias: z + alias: y + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 21 Data size: 90 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Right Outer Join0 to 1 @@ -1465,40 +1156,31 @@ STAGE PLANS: 0 1 2 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col8 - type: int + expressions: _col8 (type: int) outputColumnNames: _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col8 - type: int + aggregations: count(1) + keys: _col8 (type: int) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1506,35 +1188,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1581,9 +1253,6 @@ FROM (SELECT y.key AS key, count(1) AS cnt FROM T2 x RIGHT OUTER JOIN T1 y ON (x.key = y.key) RIGHT OUTER JOIN T3 z ON (y.key = z.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME T2) x) (TOK_TABREF (TOK_TABNAME T1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME T3) z) (= (. (TOK_TABLE_OR_COL y) key) (. (TOK_TABLE_OR_COL z) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1593,46 +1262,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - tmp:y + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - value expressions: - expr: key - type: int - tmp:z + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 2 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: @@ -1642,25 +1297,20 @@ STAGE PLANS: 0 1 {VALUE._col0} 2 - handleSkewJoin: false outputColumnNames: _col4 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: int + expressions: _col4 (type: int) outputColumnNames: _col4 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col4 - type: int + aggregations: count(1) + keys: _col4 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1668,48 +1318,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1717,35 +1351,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1790,9 +1414,6 @@ FROM (SELECT y.key AS key, count(1) AS cnt FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME T2) x) (TOK_TABREF (TOK_TABNAME T1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME T3) z) (= (. (TOK_TABLE_OR_COL y) key) (. (TOK_TABLE_OR_COL z) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1802,46 +1423,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - tmp:y + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - value expressions: - expr: key - type: int - tmp:z + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 2 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: @@ -1851,25 +1458,20 @@ STAGE PLANS: 0 1 {VALUE._col0} 2 - handleSkewJoin: false outputColumnNames: _col4 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: int + expressions: _col4 (type: int) outputColumnNames: _col4 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col4 - type: int + aggregations: count(1) + keys: _col4 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1877,48 +1479,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1926,35 +1512,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1995,9 +1571,6 @@ FROM (SELECT y.key AS key, count(1) AS cnt FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME T2) x) (TOK_TABREF (TOK_TABNAME T1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME T3) z) (= (. (TOK_TABLE_OR_COL y) key) (. (TOK_TABLE_OR_COL z) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL tmp) cnt))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2007,46 +1580,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - tmp:y + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - value expressions: - expr: key - type: int - tmp:z + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 2 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: @@ -2056,25 +1615,20 @@ STAGE PLANS: 0 1 {VALUE._col0} 2 - handleSkewJoin: false outputColumnNames: _col4 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: int + expressions: _col4 (type: int) outputColumnNames: _col4 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col4 - type: int + aggregations: count(1) + keys: _col4 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2082,48 +1636,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2131,35 +1669,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/correlationoptimizer5.q.out ql/src/test/results/clientpositive/correlationoptimizer5.q.out index f1348eb..adff359 100644 --- ql/src/test/results/clientpositive/correlationoptimizer5.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer5.q.out @@ -79,9 +79,6 @@ JOIN (SELECT m.key, n.val FROM T3 m JOIN T4 n ON (m.key = n.key)) d ON b.key = d.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) x) (TOK_TABREF (TOK_TABNAME T2) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) val))))) b) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T3) m) (TOK_TABREF (TOK_TABNAME T4) n) (= (. (TOK_TABLE_OR_COL m) key) (. (TOK_TABLE_OR_COL n) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL m) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL n) val))))) d) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_co1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL d) val))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -92,37 +89,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - d:m + Map Operator Tree: TableScan alias: m + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - d:n + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) TableScan alias: n + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - value expressions: - expr: val - type: string + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -130,18 +115,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col5 + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -149,35 +130,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - value expressions: - expr: _col1 - type: string - $INTNAME1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col0 - type: int + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -185,18 +152,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col3 + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col3 - type: string + expressions: _col0 (type: int), _col3 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -218,34 +182,24 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - b:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - b:y + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) TableScan alias: y + Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -253,16 +207,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -316,9 +268,6 @@ ON b.key = d.key POSTHOOK: type: QUERY POSTHOOK: Lineage: dest_co1.key EXPRESSION [(t1)x.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: dest_co1.val SIMPLE [(t4)n.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) x) (TOK_TABREF (TOK_TABNAME T2) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) val))))) b) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T3) m) (TOK_TABREF (TOK_TABNAME T4) n) (= (. (TOK_TABLE_OR_COL m) key) (. (TOK_TABLE_OR_COL n) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL m) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL n) val))))) d) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_co2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL d) val))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -327,98 +276,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b:x + Map Operator Tree: TableScan - alias: x + alias: m + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - b:y + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) TableScan - alias: y + alias: x + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - d:m + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) TableScan - alias: m + alias: n + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 2 - value expressions: - expr: key - type: int - d:n + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) TableScan - alias: n + alias: y + Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 3 - value expressions: - expr: val - type: string + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 2956 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col3 - type: string + expressions: _col0 (type: int), _col3 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -430,34 +355,29 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col3 - type: string + expressions: _col0 (type: int), _col3 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -531,9 +451,6 @@ POSTHOOK: Lineage: dest_co1.key EXPRESSION [(t1)x.FieldSchema(name:key, type:int POSTHOOK: Lineage: dest_co1.val SIMPLE [(t4)n.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: dest_co2.key EXPRESSION [(t1)x.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: dest_co2.val SIMPLE [(t4)n.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) x) (TOK_TABREF (TOK_TABNAME T2) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) val))))) b) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T3) m) (TOK_TABREF (TOK_TABNAME T4) n) (= (. (TOK_TABLE_OR_COL m) key) (. (TOK_TABLE_OR_COL n) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL m) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL n) val))))) d) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_co3))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL d) val))))) - STAGE DEPENDENCIES: Stage-10 is a root stage Stage-9 depends on stages: Stage-10, Stage-11 , consists of Stage-7, Stage-8, Stage-2 @@ -547,32 +464,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: - d:n + Map Operator Tree: TableScan alias: n + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 {val} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col5 - Position of Big Table: 1 + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -587,14 +499,14 @@ STAGE PLANS: d:m TableScan alias: m + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: NONE Stage: Stage-9 Conditional Operator Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: - $INTNAME1 + Map Operator Tree: TableScan Map Join Operator condition map: @@ -602,22 +514,15 @@ STAGE PLANS: condition expressions: 0 {_col0} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col3 - type: string + expressions: _col0 (type: int), _col3 (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -648,8 +553,7 @@ STAGE PLANS: Stage: Stage-8 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -657,22 +561,15 @@ STAGE PLANS: condition expressions: 0 {_col0} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col3 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col3 - type: string + expressions: _col0 (type: int), _col3 (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -690,35 +587,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - value expressions: - expr: _col1 - type: string - $INTNAME1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col0 - type: int + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -726,18 +609,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col3 + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col3 - type: string + expressions: _col0 (type: int), _col3 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -746,30 +626,27 @@ STAGE PLANS: Stage: Stage-11 Map Reduce - Alias -> Map Operator Tree: - b:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -784,6 +661,7 @@ STAGE PLANS: b:y TableScan alias: y + Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: INSERT OVERWRITE TABLE dest_co3 SELECT b.key, d.val diff --git ql/src/test/results/clientpositive/correlationoptimizer6.q.out ql/src/test/results/clientpositive/correlationoptimizer6.q.out index 907604b..232693d 100644 --- ql/src/test/results/clientpositive/correlationoptimizer6.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer6.q.out @@ -20,9 +20,6 @@ JOIN (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -35,34 +32,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - yy:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -70,25 +57,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -96,41 +78,27 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -138,39 +106,21 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -178,22 +128,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -201,35 +143,19 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -237,34 +163,24 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - xx:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -272,25 +188,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -298,41 +209,27 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -393,9 +290,6 @@ JOIN (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -404,116 +298,84 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - xx:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - yy:x + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - yy:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 3 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 120 Data size: 12056 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -524,53 +386,40 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -578,35 +427,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -669,9 +502,6 @@ JOIN (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -680,89 +510,67 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint - yy:x + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -776,93 +584,70 @@ STAGE PLANS: xx:y TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE yy:y TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 65 Data size: 6630 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 32 Data size: 3264 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 32 Data size: 3264 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 64 Data size: 6528 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 32 Data size: 3264 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 32 Data size: 3264 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 64 Data size: 6528 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -870,35 +655,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -963,9 +732,6 @@ JOIN (SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -975,56 +741,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1032,38 +781,23 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - xx + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan alias: xx + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1071,20 +805,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col4, _col5 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string - expr: _col5 - type: bigint + expressions: _col0 (type: string), _col4 (type: string), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1092,31 +820,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1173,9 +889,6 @@ JOIN (SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1184,115 +897,84 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx - TableScan - alias: xx - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - yy:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan + alias: xx + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 60 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 90 Data size: 9042 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string - expr: _col5 - type: bigint + expressions: _col0 (type: string), _col4 (type: string), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 3014 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 90 Data size: 9042 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string - expr: _col5 - type: bigint + expressions: _col0 (type: string), _col4 (type: string), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1300,31 +982,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1387,9 +1057,6 @@ JOIN (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1400,34 +1067,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - yy:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1435,25 +1092,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1461,41 +1113,27 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1503,38 +1141,23 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - xx + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan alias: xx + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1542,20 +1165,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col4, _col5 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string - expr: _col5 - type: bigint + expressions: _col0 (type: string), _col4 (type: string), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1563,31 +1180,19 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1644,9 +1249,6 @@ JOIN (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1655,72 +1257,52 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx + Map Operator Tree: TableScan - alias: xx + alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - yy:x + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: x + alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - yy:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: xx + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 118 Data size: 11840 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 118 Data size: 11840 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string - expr: _col5 - type: bigint + expressions: _col0 (type: string), _col4 (type: string), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1731,51 +1313,40 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 118 Data size: 11840 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string - expr: _col5 - type: bigint + expressions: _col0 (type: string), _col4 (type: string), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1783,31 +1354,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1870,9 +1429,6 @@ FROM JOIN src yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_TABREF (TOK_TABNAME src) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 @@ -1883,34 +1439,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - xx:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1918,25 +1464,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1944,41 +1485,27 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1986,38 +1513,23 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: yy + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - yy + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: yy Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -2025,20 +1537,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2046,31 +1552,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2149,9 +1643,6 @@ FROM JOIN src yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_TABREF (TOK_TABNAME src) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2160,127 +1651,96 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan - alias: x + alias: yy + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - xx:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: y + alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - yy + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: yy + alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 62 Data size: 6244 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 62 Data size: 6244 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Mux Operator + Statistics: Num rows: 62 Data size: 6244 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2288,31 +1748,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2401,9 +1849,6 @@ JOIN ON zz.key=yy.key ORDER BY xx.key, yy.key, yy.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) xx) (TOK_TABREF (TOK_TABNAME src) zz) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL zz) key))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) yy) (= (. (TOK_TABLE_OR_COL zz) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 @@ -2414,34 +1859,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - yy:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - yy:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -2449,25 +1884,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2475,41 +1905,27 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2517,50 +1933,31 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: zz + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 2 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - xx + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan - alias: xx Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - zz + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan - alias: zz + alias: xx + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -2570,20 +1967,14 @@ STAGE PLANS: 0 {VALUE._col0} 1 2 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col8, _col9 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col8 - type: string - expr: _col9 - type: bigint + expressions: _col0 (type: string), _col8 (type: string), _col9 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2591,31 +1982,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2708,9 +2087,6 @@ JOIN ON zz.key=yy.key ORDER BY xx.key, yy.key, yy.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) xx) (TOK_TABREF (TOK_TABNAME src) zz) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL zz) key))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) yy) (= (. (TOK_TABLE_OR_COL zz) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2719,64 +2095,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx + Map Operator Tree: TableScan - alias: xx + alias: zz + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - yy:x + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - yy:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 3 - zz + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan - alias: zz + alias: xx + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 176 Data size: 17652 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 352 Data size: 35304 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 @@ -2785,20 +2143,14 @@ STAGE PLANS: 0 {VALUE._col0} 1 2 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col8 - type: string - expr: _col9 - type: bigint + expressions: _col0 (type: string), _col8 (type: string), _col9 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2809,31 +2161,26 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 352 Data size: 35304 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 @@ -2842,20 +2189,14 @@ STAGE PLANS: 0 {VALUE._col0} 1 2 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col8 - type: string - expr: _col9 - type: bigint + expressions: _col0 (type: string), _col8 (type: string), _col9 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2863,31 +2204,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2978,9 +2307,6 @@ JOIN ON xx.key=yy.key JOIN src zz ON yy.key=zz.key ORDER BY xx.key, yy.key, yy.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key))) (TOK_TABREF (TOK_TABNAME src) zz) (= (. (TOK_TABLE_OR_COL yy) key) (. (TOK_TABLE_OR_COL zz) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 @@ -2991,34 +2317,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - yy:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - yy:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -3026,25 +2342,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3052,41 +2363,27 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3094,50 +2391,31 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: zz + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - xx + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan - alias: xx Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - zz + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan - alias: zz + alias: xx + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -3147,20 +2425,14 @@ STAGE PLANS: 0 {VALUE._col0} 1 {VALUE._col0} {VALUE._col1} 2 - handleSkewJoin: false outputColumnNames: _col0, _col4, _col5 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string - expr: _col5 - type: bigint + expressions: _col0 (type: string), _col4 (type: string), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3168,31 +2440,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3275,9 +2535,6 @@ JOIN ON xx.key=yy.key JOIN src zz ON yy.key=zz.key ORDER BY xx.key, yy.key, yy.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key))) (TOK_TABREF (TOK_TABNAME src) zz) (= (. (TOK_TABLE_OR_COL yy) key) (. (TOK_TABLE_OR_COL zz) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3286,64 +2543,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx + Map Operator Tree: TableScan - alias: xx + alias: zz + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - yy:x + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - yy:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - zz + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan - alias: zz + alias: xx + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 3 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 176 Data size: 17652 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 352 Data size: 35304 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 @@ -3352,20 +2591,14 @@ STAGE PLANS: 0 {VALUE._col0} 1 {VALUE._col0} {VALUE._col1} 2 - handleSkewJoin: false outputColumnNames: _col0, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string - expr: _col5 - type: bigint + expressions: _col0 (type: string), _col4 (type: string), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3376,31 +2609,26 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 352 Data size: 35304 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 @@ -3409,20 +2637,14 @@ STAGE PLANS: 0 {VALUE._col0} 1 {VALUE._col0} {VALUE._col1} 2 - handleSkewJoin: false outputColumnNames: _col0, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string - expr: _col5 - type: bigint + expressions: _col0 (type: string), _col4 (type: string), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3430,31 +2652,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3547,9 +2757,6 @@ FROM JOIN src z ON tmp.key=z.key ORDER BY tmp.key, tmp.sum1, tmp.sum2, z.key, z.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key) key) (TOK_SELEXPR (TOK_FUNCTION sum (. (TOK_TABLE_OR_COL xx) cnt)) sum1) (TOK_SELEXPR (TOK_FUNCTION sum (. (TOK_TABLE_OR_COL yy) cnt)) sum2)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL xx) key)))) tmp) (TOK_TABREF (TOK_TABNAME src) z) (= (. (TOK_TABLE_OR_COL tmp) key) (. (TOK_TABLE_OR_COL z) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp) sum1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp) sum2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL tmp) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL tmp) sum1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL tmp) sum2)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL z) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL z) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-7 @@ -3562,56 +2769,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:yy:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3619,37 +2809,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp:$INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: bigint - tmp:$INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -3657,30 +2831,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col1) - expr: sum(_col3) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: sum(_col1), sum(_col3) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3688,46 +2852,27 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 1546 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 1546 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3735,42 +2880,23 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: bigint - z + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 15 Data size: 1546 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) TableScan alias: z + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -3778,24 +2904,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3803,39 +2919,19 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: string - expr: _col4 - type: string + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string) sort order: +++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: string - expr: _col4 - type: string + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3843,56 +2939,39 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: - tmp:xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3983,9 +3062,6 @@ FROM JOIN src z ON tmp.key=z.key ORDER BY tmp.key, tmp.sum1, tmp.sum2, z.key, z.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key) key) (TOK_SELEXPR (TOK_FUNCTION sum (. (TOK_TABLE_OR_COL xx) cnt)) sum1) (TOK_SELEXPR (TOK_FUNCTION sum (. (TOK_TABLE_OR_COL yy) cnt)) sum2)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL xx) key)))) tmp) (TOK_TABREF (TOK_TABNAME src) z) (= (. (TOK_TABLE_OR_COL tmp) key) (. (TOK_TABLE_OR_COL z) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp) sum1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp) sum2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL tmp) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL tmp) sum1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL tmp) sum2)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL z) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL z) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3994,275 +3070,185 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:xx:x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint - tmp:yy:y + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan - alias: y + alias: z + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + TableScan + alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: bigint - z - TableScan - alias: z - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 89 Data size: 11840 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 44 Data size: 5853 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 44 Data size: 5853 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 88 Data size: 11706 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col1) - expr: sum(_col3) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: sum(_col1), sum(_col3) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 89 Data size: 11840 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 44 Data size: 5853 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 44 Data size: 5853 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 88 Data size: 11706 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col1) - expr: sum(_col3) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: sum(_col1), sum(_col3) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 89 Data size: 11840 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Mux Operator + Statistics: Num rows: 89 Data size: 11840 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4270,39 +3256,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: string - expr: _col4 - type: string + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string) sort order: +++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: string - expr: _col4 - type: string + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4397,9 +3363,6 @@ JOIN (SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL x) value)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -4412,36 +3375,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - yy:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -4449,29 +3400,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4479,49 +3421,27 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4529,41 +3449,21 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -4571,24 +3471,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4596,39 +3486,19 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) sort order: +++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: bigint + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4636,34 +3506,24 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - xx:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -4671,25 +3531,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4697,41 +3552,27 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4792,9 +3633,6 @@ JOIN (SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL x) value)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -4805,36 +3643,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - yy:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -4842,29 +3668,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4872,49 +3689,27 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4922,138 +3717,94 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 2 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint - xx:x + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - xx:y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 35 Data size: 3577 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 35 Data size: 3577 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Mux Operator + Statistics: Num rows: 35 Data size: 3577 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5061,39 +3812,19 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) sort order: +++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: bigint + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5154,9 +3885,6 @@ JOIN (SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL x) value)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -5166,56 +3894,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - yy:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -5226,30 +3935,20 @@ STAGE PLANS: yy:y TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5257,66 +3956,44 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint - xx:x + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -5327,78 +4004,56 @@ STAGE PLANS: xx:y TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 33 Data size: 3382 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1639 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1639 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 49 Data size: 5021 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Mux Operator + Statistics: Num rows: 49 Data size: 5021 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5406,39 +4061,19 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) sort order: +++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: bigint + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/correlationoptimizer7.q.out ql/src/test/results/clientpositive/correlationoptimizer7.q.out index 1f533a5..c4bd5ef 100644 --- ql/src/test/results/clientpositive/correlationoptimizer7.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer7.q.out @@ -14,9 +14,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt JOIN src1 yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_TABREF (TOK_TABNAME src1) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) value))))) - STAGE DEPENDENCIES: Stage-4 is a root stage Stage-2 depends on stages: Stage-4 @@ -25,48 +22,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -77,26 +63,20 @@ STAGE PLANS: xx:y TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -104,8 +84,7 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -113,44 +92,20 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] + 0 _col0 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Position of Big Table: 0 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -161,11 +116,13 @@ STAGE PLANS: yy TableScan alias: yy + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Extract + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -226,9 +183,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt JOIN src1 yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_TABREF (TOK_TABNAME src1) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -237,65 +191,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: + TableScan + alias: yy + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint - yy - TableScan - alias: yy - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -306,74 +241,56 @@ STAGE PLANS: xx:y TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 64 Data size: 6609 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 32 Data size: 3304 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 32 Data size: 3304 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 96 Data size: 9913 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Mux Operator + Statistics: Num rows: 96 Data size: 9913 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -381,35 +298,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -482,9 +383,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt JOIN src1 yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_TABREF (TOK_TABNAME src1) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) value))))) - STAGE DEPENDENCIES: Stage-4 is a root stage Stage-2 depends on stages: Stage-4 @@ -493,48 +391,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -545,26 +432,20 @@ STAGE PLANS: xx:y TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -572,8 +453,7 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -581,44 +461,20 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] + 0 _col0 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Position of Big Table: 0 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -629,11 +485,13 @@ STAGE PLANS: yy TableScan alias: yy + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Extract + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -694,9 +552,6 @@ FROM (SELECT x.key AS key, count(1) AS cnt JOIN src1 yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_TABREF (TOK_TABNAME src1) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -705,65 +560,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: + TableScan + alias: yy + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint - yy - TableScan - alias: yy - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -774,74 +610,56 @@ STAGE PLANS: xx:y TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 64 Data size: 6609 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 32 Data size: 3304 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 32 Data size: 3304 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 96 Data size: 9913 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Mux Operator + Statistics: Num rows: 96 Data size: 9913 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -849,35 +667,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/correlationoptimizer8.q.out ql/src/test/results/clientpositive/correlationoptimizer8.q.out index 4dd3a7f..23a5405 100644 --- ql/src/test/results/clientpositive/correlationoptimizer8.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer8.q.out @@ -28,9 +28,6 @@ FROM ) subq1 JOIN src1 x ON (x.key = subq1.key) ORDER BY x.key, x.value, subq1.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key))))) subq1) (TOK_TABREF (TOK_TABNAME src1) x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL x) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL x) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq1) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -41,60 +38,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:subq1-subquery1:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 20) - type: boolean + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -102,54 +81,34 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 18 Data size: 1802 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint -#### A masked pattern was here #### + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 1802 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan Union + Statistics: Num rows: 18 Data size: 1802 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint - x + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 1802 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -157,20 +116,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 19 Data size: 1982 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col1 - type: bigint + expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 19 Data size: 1982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -178,31 +131,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + Statistics: Num rows: 19 Data size: 1982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 19 Data size: 1982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 19 Data size: 1982 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -210,60 +151,42 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:subq1-subquery2:x1 + Map Operator Tree: TableScan alias: x1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 100) - type: boolean + predicate: (key > 100) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -326,9 +249,6 @@ FROM ) subq1 JOIN src1 x ON (x.key = subq1.key) ORDER BY x.key, x.value, subq1.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key))))) subq1) (TOK_TABREF (TOK_TABNAME src1) x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL x) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL x) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq1) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -337,197 +257,143 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:subq1-subquery1:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 20) - type: boolean + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint - null-subquery2:subq1-subquery2:x1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan alias: x1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 100) - type: boolean + predicate: (key > 100) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: bigint - x + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 39 Data size: 4022 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1959 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1959 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 38 Data size: 3918 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 77 Data size: 7940 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col1 - type: bigint + expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1959 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1959 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 38 Data size: 3918 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 77 Data size: 7940 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col1 - type: bigint + expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Mux Operator + Statistics: Num rows: 77 Data size: 7940 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col1 - type: bigint + expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -535,31 +401,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -634,9 +488,6 @@ FROM ) subq1 LEFT OUTER JOIN src1 x ON (x.key = subq1.key) ORDER BY subq1.key, subq1.cnt, x.key, x.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) value) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) value))))) subq1) (TOK_TABREF (TOK_TABNAME src1) x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq1) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL x) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL x) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -647,60 +498,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:subq1-subquery1:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 20) - type: boolean + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -708,58 +541,34 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint -#### A masked pattern was here #### + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan Union + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - x + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -767,22 +576,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9 Data size: 991 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9 Data size: 991 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -790,35 +591,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + Statistics: Num rows: 9 Data size: 991 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 9 Data size: 991 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 991 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -826,60 +611,42 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:subq1-subquery2:x1 + Map Operator Tree: TableScan alias: x1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 100) - type: boolean + predicate: (key > 100) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: value - type: string + aggregations: count(1) + keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -962,9 +729,6 @@ FROM ) subq1 LEFT OUTER JOIN src1 x ON (x.key = subq1.key) ORDER BY subq1.key, subq1.cnt, x.key, x.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) value) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) value))))) subq1) (TOK_TABREF (TOK_TABNAME src1) x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq1) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL x) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL x) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -973,203 +737,143 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:subq1-subquery1:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 20) - type: boolean + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint - null-subquery2:subq1-subquery2:x1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan alias: x1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 100) - type: boolean + predicate: (key > 100) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: value - type: string + aggregations: count(1) + keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: bigint - x + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 20 Data size: 2119 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1059 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1059 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 20 Data size: 2118 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 40 Data size: 4237 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1059 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1059 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 20 Data size: 2118 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 40 Data size: 4237 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Mux Operator + Statistics: Num rows: 40 Data size: 4237 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1177,35 +881,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1294,9 +982,6 @@ FROM ) subq1 JOIN src1 x ON (x.key = subq1.key) ORDER BY x.key, x.value, subq1.cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key) (. (TOK_TABLE_OR_COL x1) value))))) subq1) (TOK_TABREF (TOK_TABNAME src1) x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL x) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL x) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq1) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -1307,60 +992,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:subq1-subquery1:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 20) - type: boolean + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1368,54 +1035,34 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 13 Data size: 1702 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint -#### A masked pattern was here #### + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1702 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan Union + Statistics: Num rows: 13 Data size: 1702 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col1 - type: bigint - x + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1702 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1423,20 +1070,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 1872 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col1 - type: bigint + expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1872 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1444,31 +1085,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + Statistics: Num rows: 14 Data size: 1872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 14 Data size: 1872 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 1872 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1476,70 +1105,42 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:subq1-subquery2:x1 + Map Operator Tree: TableScan alias: x1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 100) - type: boolean + predicate: (key > 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + aggregations: count(1) + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1573,9 +1174,6 @@ FROM ) subq1 FULL OUTER JOIN src1 x ON (x.key = subq1.key) ORDER BY subq1.key, subq1.value, x.key, x.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_INT (. (TOK_TABLE_OR_COL x) key)) key) (TOK_SELEXPR (TOK_FUNCTION count 1) value)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1) key) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (. (TOK_TABLE_OR_COL x1) key)) value)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key))))) subq1) (TOK_TABREF (TOK_TABNAME src1) x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq1) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL x) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL x) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -1586,60 +1184,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:subq1-subquery1:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 20) - type: boolean + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToLong(UDFToInteger(_col0)) - type: bigint - expr: _col1 - type: bigint + expressions: UDFToLong(UDFToInteger(_col0)) (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1647,58 +1227,34 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 18 Data size: 1802 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(_col0) - type: double + key expressions: UDFToDouble(_col0) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(_col0) - type: double - tag: 0 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint -#### A masked pattern was here #### + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 18 Data size: 1802 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) TableScan Union + Statistics: Num rows: 18 Data size: 1802 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(_col0) - type: double + key expressions: UDFToDouble(_col0) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(_col0) - type: double - tag: 0 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - x + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 18 Data size: 1802 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) TableScan alias: x + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(key) - type: double + key expressions: UDFToDouble(key) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(key) - type: double - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1706,22 +1262,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 19 Data size: 1982 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 19 Data size: 1982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1729,35 +1277,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + Statistics: Num rows: 19 Data size: 1982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 19 Data size: 1982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 19 Data size: 1982 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1765,60 +1297,42 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:subq1-subquery2:x1 + Map Operator Tree: TableScan alias: x1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 100) - type: boolean + predicate: (key > 100) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: bigint - expr: UDFToLong(UDFToInteger(_col0)) - type: bigint + expressions: _col1 (type: bigint), UDFToLong(UDFToInteger(_col0)) (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/correlationoptimizer9.q.out ql/src/test/results/clientpositive/correlationoptimizer9.q.out index 6b02889..31b1be6 100644 --- ql/src/test/results/clientpositive/correlationoptimizer9.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer9.q.out @@ -37,9 +37,6 @@ POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) c1) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) c1) 120)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) c1)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) c2) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) c2) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) c2)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -50,60 +47,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:x1 + Map Operator Tree: TableScan alias: x1 + Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (c2 > 100) - type: boolean + predicate: (c2 > 100) (type: boolean) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: c2 - type: int + expressions: c2 (type: int) outputColumnNames: c2 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: c2 - type: int + aggregations: count(1) + keys: c2 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -111,39 +90,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -151,22 +112,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col2 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -174,35 +127,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: bigint - expr: _col3 - type: bigint + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint) sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: bigint - expr: _col3 - type: bigint + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -210,60 +147,42 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (c1 < 120) - type: boolean + predicate: (c1 < 120) (type: boolean) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: c1 - type: int + expressions: c1 (type: int) outputColumnNames: c1 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: c1 - type: int + aggregations: count(1) + keys: c1 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -326,9 +245,6 @@ POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) c1) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) c1) 120)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) c1)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) c2) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) c2) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) c2)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -337,157 +253,110 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan - alias: x + alias: x1 + Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (c1 < 120) - type: boolean + predicate: (c2 > 100) (type: boolean) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: c1 - type: int - outputColumnNames: c1 + expressions: c2 (type: int) + outputColumnNames: c2 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: c1 - type: int + aggregations: count(1) + keys: c2 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col1 - type: bigint - yy:x1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan - alias: x1 + alias: x + Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (c2 > 100) - type: boolean + predicate: (c1 < 120) (type: boolean) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: c2 - type: int - outputColumnNames: c2 + expressions: c1 (type: int) + outputColumnNames: c1 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: c2 - type: int + aggregations: count(1) + keys: c1 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 684 Data size: 15278 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 684 Data size: 15278 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col2 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 684 Data size: 15278 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col2 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -495,35 +364,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: bigint - expr: _col3 - type: bigint + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint) sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: bigint - expr: _col3 - type: bigint + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -584,9 +437,6 @@ POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) c1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) c3) key2) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) c1) 120)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) c1) (. (TOK_TABLE_OR_COL x) c3)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) c1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) c3) key2) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) c2) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) c1) (. (TOK_TABLE_OR_COL x1) c3)))) yy) (AND (= (. (TOK_TABLE_OR_COL xx) key1) (. (TOK_TABLE_OR_COL yy) key1)) (== (. (TOK_TABLE_OR_COL xx) key2) (. (TOK_TABLE_OR_COL yy) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key2)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key2)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -597,72 +447,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - yy:x1 + Map Operator Tree: TableScan alias: x1 + Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (c2 > 100) - type: boolean + predicate: (c2 > 100) (type: boolean) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: c1 - type: int - expr: c3 - type: string + expressions: c1 (type: int), c3 (type: string) outputColumnNames: c1, c3 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: c1 - type: int - expr: c3 - type: string + aggregations: count(1) + keys: c1 (type: int), c3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -670,51 +490,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: string - tag: 1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -722,26 +512,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col3 - type: int - expr: _col4 - type: string - expr: _col2 - type: bigint - expr: _col5 - type: bigint + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -749,43 +527,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - expr: _col4 - type: bigint - expr: _col5 - type: bigint + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: bigint) sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - expr: _col4 - type: bigint - expr: _col5 - type: bigint + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -793,72 +547,42 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (c1 < 120) - type: boolean + predicate: (c1 < 120) (type: boolean) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: c1 - type: int - expr: c3 - type: string + expressions: c1 (type: int), c3 (type: string) outputColumnNames: c1, c3 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: c1 - type: int - expr: c3 - type: string + aggregations: count(1) + keys: c1 (type: int), c3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -919,9 +643,6 @@ POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) c1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) c3) key2) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) c1) 120)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) c1) (. (TOK_TABLE_OR_COL x) c3)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) c1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) c3) key2) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) c2) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) c1) (. (TOK_TABLE_OR_COL x1) c3)))) yy) (AND (= (. (TOK_TABLE_OR_COL xx) key1) (. (TOK_TABLE_OR_COL yy) key1)) (== (. (TOK_TABLE_OR_COL xx) key2) (. (TOK_TABLE_OR_COL yy) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key2)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key2)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -930,189 +651,110 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - xx:x + Map Operator Tree: TableScan - alias: x + alias: x1 + Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (c1 < 120) - type: boolean + predicate: (c2 > 100) (type: boolean) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: c1 - type: int - expr: c3 - type: string + expressions: c1 (type: int), c3 (type: string) outputColumnNames: c1, c3 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: c1 - type: int - expr: c3 - type: string + aggregations: count(1) + keys: c1 (type: int), c3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col2 - type: bigint - yy:x1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) TableScan - alias: x1 + alias: x + Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (c2 > 100) - type: boolean + predicate: (c1 < 120) (type: boolean) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: c1 - type: int - expr: c3 - type: string + expressions: c1 (type: int), c3 (type: string) outputColumnNames: c1, c3 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: c1 - type: int - expr: c3 - type: string + aggregations: count(1) + keys: c1 (type: int), c3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: string - tag: 1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Demux Operator + Statistics: Num rows: 684 Data size: 15278 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 684 Data size: 15278 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col3 - type: int - expr: _col4 - type: string - expr: _col2 - type: bigint - expr: _col5 - type: bigint + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 684 Data size: 15278 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col3 - type: int - expr: _col4 - type: string - expr: _col2 - type: bigint - expr: _col5 - type: bigint + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1120,43 +762,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - expr: _col4 - type: bigint - expr: _col5 - type: bigint + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: bigint) sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - expr: _col4 - type: bigint - expr: _col5 - type: bigint + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/count.q.out ql/src/test/results/clientpositive/count.q.out index 7a214f1..3ca01b2 100644 --- ql/src/test/results/clientpositive/count.q.out +++ ql/src/test/results/clientpositive/count.q.out @@ -28,9 +28,6 @@ PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) f PREHOOK: type: QUERY POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME abcd))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL d)))) (TOK_GROUPBY (TOK_TABLE_OR_COL a)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -38,82 +35,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - abcd + Map Operator Tree: TableScan alias: abcd + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: a - type: int - expr: b - type: int - expr: c - type: int - expr: d - type: int + expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT b) - expr: count(DISTINCT c) - expr: sum(d) - bucketGroup: false - keys: - expr: a - type: int - expr: b - type: int - expr: c - type: int + aggregations: count(DISTINCT b), count(DISTINCT c), sum(d) + keys: a (type: int), b (type: int), c (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: int + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: count(DISTINCT KEY._col1:1._col0) - expr: sum(VALUE._col2) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col2) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -139,9 +94,6 @@ PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), PREHOOK: type: QUERY POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME abcd))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL a))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL d))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL a))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL d))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL b) (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL c) (TOK_TABLE_OR_COL d))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL d))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL b) (TOK_TABLE_OR_COL d))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b) (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL b) (TOK_TABLE_OR_COL c) (TOK_TABLE_OR_COL d))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL c) (TOK_TABLE_OR_COL d))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b) (TOK_TABLE_OR_COL d))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b) (TOK_TABLE_OR_COL c) (TOK_TABLE_OR_COL d)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -149,186 +101,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - abcd + Map Operator Tree: TableScan alias: abcd + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: a - type: int - expr: b - type: int - expr: c - type: int - expr: d - type: int + expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - expr: count() - expr: count(a) - expr: count(b) - expr: count(c) - expr: count(d) - expr: count(DISTINCT a) - expr: count(DISTINCT b) - expr: count(DISTINCT c) - expr: count(DISTINCT d) - expr: count(DISTINCT a, b) - expr: count(DISTINCT b, c) - expr: count(DISTINCT c, d) - expr: count(DISTINCT a, d) - expr: count(DISTINCT a, c) - expr: count(DISTINCT b, d) - expr: count(DISTINCT a, b, c) - expr: count(DISTINCT b, c, d) - expr: count(DISTINCT a, c, d) - expr: count(DISTINCT a, b, d) - expr: count(DISTINCT a, b, c, d) - bucketGroup: false - keys: - expr: a - type: int - expr: b - type: int - expr: c - type: int - expr: d - type: int + aggregations: count(1), count(), count(a), count(b), count(c), count(d), count(DISTINCT a), count(DISTINCT b), count(DISTINCT c), count(DISTINCT d), count(DISTINCT a, b), count(DISTINCT b, c), count(DISTINCT c, d), count(DISTINCT a, d), count(DISTINCT a, c), count(DISTINCT b, d), count(DISTINCT a, b, c), count(DISTINCT b, c, d), count(DISTINCT a, c, d), count(DISTINCT a, b, d), count(DISTINCT a, b, c, d) + keys: a (type: int), b (type: int), c (type: int), d (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: int - expr: _col3 - type: int + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) sort order: ++++ - tag: -1 - value expressions: - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint - expr: _col8 - type: bigint - expr: _col9 - type: bigint - expr: _col10 - type: bigint - expr: _col11 - type: bigint - expr: _col12 - type: bigint - expr: _col13 - type: bigint - expr: _col14 - type: bigint - expr: _col15 - type: bigint - expr: _col16 - type: bigint - expr: _col17 - type: bigint - expr: _col18 - type: bigint - expr: _col19 - type: bigint - expr: _col20 - type: bigint - expr: _col21 - type: bigint - expr: _col22 - type: bigint - expr: _col23 - type: bigint - expr: _col24 - type: bigint + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint), _col15 (type: bigint), _col16 (type: bigint), _col17 (type: bigint), _col18 (type: bigint), _col19 (type: bigint), _col20 (type: bigint), _col21 (type: bigint), _col22 (type: bigint), _col23 (type: bigint), _col24 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: count(VALUE._col1) - expr: count(VALUE._col2) - expr: count(VALUE._col3) - expr: count(VALUE._col4) - expr: count(VALUE._col5) - expr: count(DISTINCT KEY._col0:0._col0) - expr: count(DISTINCT KEY._col0:1._col0) - expr: count(DISTINCT KEY._col0:2._col0) - expr: count(DISTINCT KEY._col0:3._col0) - expr: count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1) - expr: count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1) - expr: count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1) - expr: count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1) - expr: count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1) - expr: count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1) - expr: count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2) - expr: count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2) - expr: count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2) - expr: count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2) - expr: count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) - bucketGroup: false + aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint - expr: _col8 - type: bigint - expr: _col9 - type: bigint - expr: _col10 - type: bigint - expr: _col11 - type: bigint - expr: _col12 - type: bigint - expr: _col13 - type: bigint - expr: _col14 - type: bigint - expr: _col15 - type: bigint - expr: _col16 - type: bigint - expr: _col17 - type: bigint - expr: _col18 - type: bigint - expr: _col19 - type: bigint - expr: _col20 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint), _col15 (type: bigint), _col16 (type: bigint), _col17 (type: bigint), _col18 (type: bigint), _col19 (type: bigint), _col20 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -351,9 +155,6 @@ PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) f PREHOOK: type: QUERY POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME abcd))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL d)))) (TOK_GROUPBY (TOK_TABLE_OR_COL a)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -361,63 +162,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - abcd + Map Operator Tree: TableScan alias: abcd + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: a - type: int - expr: b - type: int - expr: c - type: int - expr: d - type: int + expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: a - type: int - expr: b - type: int - expr: c - type: int + key expressions: a (type: int), b (type: int), c (type: int) sort order: +++ - Map-reduce partition columns: - expr: a - type: int - tag: -1 - value expressions: - expr: d - type: int + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + value expressions: d (type: int) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: count(DISTINCT KEY._col1:1._col0) - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col0) + keys: KEY._col0 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -443,9 +215,6 @@ PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), PREHOOK: type: QUERY POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME abcd))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL a))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL d))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL a))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL d))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL b) (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL c) (TOK_TABLE_OR_COL d))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL d))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL b) (TOK_TABLE_OR_COL d))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b) (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL b) (TOK_TABLE_OR_COL c) (TOK_TABLE_OR_COL d))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL c) (TOK_TABLE_OR_COL d))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b) (TOK_TABLE_OR_COL d))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b) (TOK_TABLE_OR_COL c) (TOK_TABLE_OR_COL d)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -453,111 +222,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - abcd + Map Operator Tree: TableScan alias: abcd + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: a - type: int - expr: b - type: int - expr: c - type: int - expr: d - type: int + expressions: a (type: int), b (type: int), c (type: int), d (type: int) outputColumnNames: a, b, c, d + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: a - type: int - expr: b - type: int - expr: c - type: int - expr: d - type: int + key expressions: a (type: int), b (type: int), c (type: int), d (type: int) sort order: ++++ - tag: -1 - value expressions: - expr: 1 - type: int + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + value expressions: 1 (type: int) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(1) - expr: count() - expr: count(KEY._col0:0._col0) - expr: count(KEY._col0:1._col0) - expr: count(KEY._col0:2._col0) - expr: count(KEY._col0:3._col0) - expr: count(DISTINCT KEY._col0:0._col0) - expr: count(DISTINCT KEY._col0:1._col0) - expr: count(DISTINCT KEY._col0:2._col0) - expr: count(DISTINCT KEY._col0:3._col0) - expr: count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1) - expr: count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1) - expr: count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1) - expr: count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1) - expr: count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1) - expr: count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1) - expr: count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2) - expr: count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2) - expr: count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2) - expr: count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2) - expr: count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) - bucketGroup: false + aggregations: count(1), count(), count(KEY._col0:0._col0), count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint - expr: _col8 - type: bigint - expr: _col9 - type: bigint - expr: _col10 - type: bigint - expr: _col11 - type: bigint - expr: _col12 - type: bigint - expr: _col13 - type: bigint - expr: _col14 - type: bigint - expr: _col15 - type: bigint - expr: _col16 - type: bigint - expr: _col17 - type: bigint - expr: _col18 - type: bigint - expr: _col19 - type: bigint - expr: _col20 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint), _col15 (type: bigint), _col16 (type: bigint), _col17 (type: bigint), _col18 (type: bigint), _col19 (type: bigint), _col20 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/create_genericudaf.q.out ql/src/test/results/clientpositive/create_genericudaf.q.out index 47ef896..96fe2fa 100644 --- ql/src/test/results/clientpositive/create_genericudaf.q.out +++ ql/src/test/results/clientpositive/create_genericudaf.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATEFUNCTION POSTHOOK: query: EXPLAIN CREATE TEMPORARY FUNCTION test_avg AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage' POSTHOOK: type: CREATEFUNCTION -ABSTRACT SYNTAX TREE: - (TOK_CREATEFUNCTION test_avg 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage') - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -29,9 +26,6 @@ SELECT test_avg(substr(value,5)) FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION test_avg 1)) (TOK_SELEXPR (TOK_FUNCTION test_avg (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 5)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -39,48 +33,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: test_avg(1) - expr: test_avg(substr(value, 5)) - bucketGroup: false + aggregations: test_avg(1), test_avg(substr(value, 5)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct - expr: _col1 - type: struct + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: test_avg(VALUE._col0) - expr: test_avg(VALUE._col1) - bucketGroup: false + aggregations: test_avg(VALUE._col0), test_avg(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: double + expressions: _col0 (type: double), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/create_genericudf.q.out ql/src/test/results/clientpositive/create_genericudf.q.out index 7f0861f..f86ad6a 100644 --- ql/src/test/results/clientpositive/create_genericudf.q.out +++ ql/src/test/results/clientpositive/create_genericudf.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATEFUNCTION POSTHOOK: query: EXPLAIN CREATE TEMPORARY FUNCTION test_translate AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestTranslate' POSTHOOK: type: CREATEFUNCTION -ABSTRACT SYNTAX TREE: - (TOK_CREATEFUNCTION test_translate 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestTranslate') - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/create_udaf.q.out ql/src/test/results/clientpositive/create_udaf.q.out index 25b65ec..807a8bc 100644 --- ql/src/test/results/clientpositive/create_udaf.q.out +++ ql/src/test/results/clientpositive/create_udaf.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATEFUNCTION POSTHOOK: query: EXPLAIN CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax' POSTHOOK: type: CREATEFUNCTION -ABSTRACT SYNTAX TREE: - (TOK_CREATEFUNCTION test_max 'org.apache.hadoop.hive.ql.udf.UDAFTestMax') - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/create_union_table.q.out ql/src/test/results/clientpositive/create_union_table.q.out index 49dbbab..1f5b0a6 100644 --- ql/src/test/results/clientpositive/create_union_table.q.out +++ ql/src/test/results/clientpositive/create_union_table.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: explain create table abc(mydata uniontype,struct>, strct struct) POSTHOOK: type: CREATETABLE -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME abc) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL mydata (TOK_UNIONTYPE (TOK_COLTYPELIST TOK_INT TOK_DOUBLE (TOK_LIST TOK_STRING) (TOK_STRUCT (TOK_TABCOLLIST (TOK_TABCOL a TOK_INT) (TOK_TABCOL b TOK_STRING)))))) (TOK_TABCOL strct (TOK_STRUCT (TOK_TABCOLLIST (TOK_TABCOL a TOK_INT) (TOK_TABCOL b TOK_STRING) (TOK_TABCOL c TOK_STRING)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -15,12 +12,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: mydata uniontype,struct>, strct struct - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: abc - isExternal: false PREHOOK: query: create table abc(mydata uniontype,struct>, strct struct) diff --git ql/src/test/results/clientpositive/create_view.q.out ql/src/test/results/clientpositive/create_view.q.out index 0ef27da..bcab778 100644 --- ql/src/test/results/clientpositive/create_view.q.out +++ ql/src/test/results/clientpositive/create_view.q.out @@ -144,9 +144,6 @@ POSTHOOK: query: -- test EXPLAIN output for CREATE VIEW EXPLAIN CREATE VIEW view0(valoo) AS SELECT upper(value) FROM src WHERE key=86 POSTHOOK: type: CREATEVIEW -ABSTRACT SYNTAX TREE: - (TOK_CREATEVIEW (TOK_TABNAME view0) (TOK_TABCOLNAME (TOK_TABCOL valoo TOK_NULL)) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION upper (TOK_TABLE_OR_COL value)))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -154,8 +151,6 @@ STAGE PLANS: Stage: Stage-0 Create View Operator: Create View - if not exists: false - is alter view as select: false or replace: false columns: valoo string expanded text: SELECT `_c0` AS `valoo` FROM (SELECT upper(`src`.`value`) FROM `default`.`src` WHERE `src`.`key`=86) `view0` @@ -170,9 +165,6 @@ POSTHOOK: query: -- make sure EXPLAIN works with a query which references a view EXPLAIN SELECT * from view2 where key=18 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME view2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 18)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -180,24 +172,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - view2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 18) - type: boolean + predicate: (key = 18) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/create_view_translate.q.out ql/src/test/results/clientpositive/create_view_translate.q.out index 3c98794..7d8f938 100644 --- ql/src/test/results/clientpositive/create_view_translate.q.out +++ ql/src/test/results/clientpositive/create_view_translate.q.out @@ -114,9 +114,6 @@ PREHOOK: type: CREATEVIEW POSTHOOK: query: explain CREATE VIEW priceview AS SELECT items.id, items.info['price'] FROM items POSTHOOK: type: CREATEVIEW -ABSTRACT SYNTAX TREE: - (TOK_CREATEVIEW (TOK_TABNAME priceview) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME items))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL items) id)) (TOK_SELEXPR ([ (. (TOK_TABLE_OR_COL items) info) 'price')))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -124,8 +121,6 @@ STAGE PLANS: Stage: Stage-0 Create View Operator: Create View - if not exists: false - is alter view as select: false or replace: false columns: id int, _c1 string expanded text: SELECT `items`.`id`, `items`.`info`['price'] FROM `default`.`items` diff --git ql/src/test/results/clientpositive/cross_join.q.out ql/src/test/results/clientpositive/cross_join.q.out index a1f25cc..294b8d5 100644 --- ql/src/test/results/clientpositive/cross_join.q.out +++ ql/src/test/results/clientpositive/cross_join.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- current explain select src.key from src join src src2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src) src2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,22 +11,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan - alias: src + alias: src2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - tag: 0 - value expressions: - expr: key - type: string - src2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE TableScan - alias: src2 + alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: 1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -37,16 +32,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -62,9 +56,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- ansi cross join explain select src.key from src cross join src src2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_CROSSJOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src) src2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -72,22 +63,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan - alias: src + alias: src2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - tag: 0 - value expressions: - expr: key - type: string - src2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE TableScan - alias: src2 + alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: 1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -95,16 +84,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -120,9 +108,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- appending condition is allowed explain select src.key from src cross join src src2 on src.key=src2.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_CROSSJOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src) src2) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -130,34 +115,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan - alias: src + alias: src2 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - src2 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan - alias: src2 + alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -165,16 +140,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ctas_colname.q.out ql/src/test/results/clientpositive/ctas_colname.q.out index a15b698..3d568ab 100644 --- ql/src/test/results/clientpositive/ctas_colname.q.out +++ ql/src/test/results/clientpositive/ctas_colname.q.out @@ -12,9 +12,6 @@ POSTHOOK: query: -- HIVE-4392, column aliases from expressionRR (GBY, etc.) are explain create table summary as select *, sum(key), count(value) from src POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME summary) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL value))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -24,54 +21,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(key) - expr: count(value) - bucketGroup: false + aggregations: sum(key), count(value) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: double - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: count(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: bigint - expr: _col0 - type: double - expr: _col1 - type: bigint + expressions: _col0 (type: double), _col1 (type: bigint), _col0 (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -88,12 +67,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: _col0 double, _col1 bigint, _c1 double, _c2 bigint - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: summary - isExternal: false Stage: Stage-2 Stats-Aggr Operator @@ -158,9 +134,6 @@ POSTHOOK: query: -- window functions explain create table x4 as select *, rank() over(partition by key order by value) as rr from src1 POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME x4) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value)))))) rr))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -170,41 +143,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1 + Map Operator Tree: TableScan alias: src1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - tag: -1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE PTF Operator + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _wcol0 - type: int + expressions: _col0 (type: string), _col1 (type: string), _wcol0 (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -221,12 +181,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: key string, value string, rr int - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: x4 - isExternal: false Stage: Stage-2 Stats-Aggr Operator @@ -312,9 +269,6 @@ PREHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: query: explain create table x5 as select *, lead(key,1) over(partition by key order by value) as lead1 from src limit 20 POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME x5) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_FUNCTION lead (TOK_TABLE_OR_COL key) 1 (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value)))))) lead1)) (TOK_LIMIT 20)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -325,42 +279,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - tag: -1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE PTF Operator + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _wcol0 - type: string + expressions: _col0 (type: string), _col1 (type: string), _wcol0 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -368,25 +310,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -403,12 +341,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: key string, value string, lead1 string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: x5 - isExternal: false Stage: Stage-3 Stats-Aggr Operator @@ -491,9 +426,6 @@ POSTHOOK: query: -- sub queries explain create table x6 as select * from (select *, max(key) from src1) a POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME x6) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL key)))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -503,44 +435,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src1 + Map Operator Tree: TableScan alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: max(key) - bucketGroup: false + aggregations: max(key) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - bucketGroup: false + aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col0 - type: string + expressions: _col0 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -557,12 +481,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: _col0 string, _c1 string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: x6 - isExternal: false Stage: Stage-2 Stats-Aggr Operator @@ -623,9 +544,6 @@ PREHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: query: explain create table x7 as select * from (select * from src group by key) a POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME x7) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -635,47 +553,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -692,12 +600,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: _col0 string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: x7 - isExternal: false Stage: Stage-2 Stats-Aggr Operator @@ -1065,9 +970,6 @@ PREHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: query: explain create table x8 as select * from (select * from src group by key having key < 9) a POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME x8) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_HAVING (< (TOK_TABLE_OR_COL key) 9)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1077,51 +979,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 9) - type: boolean + predicate: (key < 9) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1138,12 +1029,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: _col0 string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: x8 - isExternal: false Stage: Stage-2 Stats-Aggr Operator @@ -1207,9 +1095,6 @@ PREHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: query: explain create table x9 as select * from (select max(value),key from src group by key having key < 9 AND max(value) IS NOT NULL) a POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME x9) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL value))) (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_HAVING (AND (< (TOK_TABLE_OR_COL key) 9) (TOK_FUNCTION TOK_ISNOTNULL (TOK_FUNCTION max (TOK_TABLE_OR_COL value))))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1219,66 +1104,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 9) - type: boolean + predicate: (key < 9) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: max(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: max(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((_col0 < 9) and _col1 is not null) - type: boolean + predicate: ((_col0 < 9) and _col1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col0 - type: string + expressions: _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1295,12 +1160,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: _c0 string, key string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: x9 - isExternal: false Stage: Stage-2 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/ctas_hadoop20.q.out ql/src/test/results/clientpositive/ctas_hadoop20.q.out index e732ae8..0ec0af5 100644 --- ql/src/test/results/clientpositive/ctas_hadoop20.q.out +++ ql/src/test/results/clientpositive/ctas_hadoop20.q.out @@ -19,9 +19,6 @@ PREHOOK: query: explain create table nzhang_CTAS1 as select key k, value from sr PREHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: query: explain create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME nzhang_CTAS1) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) k) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL k)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -32,36 +29,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -69,28 +57,22 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -107,12 +89,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: k string, value string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: nzhang_CTAS1 - isExternal: false Stage: Stage-3 Stats-Aggr Operator @@ -180,9 +159,6 @@ PREHOOK: query: explain create table nzhang_ctas2 as select * from src sort by k PREHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: query: explain create table nzhang_ctas2 as select * from src sort by key, value limit 10 POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME nzhang_ctas2) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -193,36 +169,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -230,28 +197,22 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -268,12 +229,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: key string, value string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: nzhang_ctas2 - isExternal: false Stage: Stage-3 Stats-Aggr Operator @@ -341,9 +299,6 @@ PREHOOK: query: explain create table nzhang_ctas3 row format serde "org.apache.h PREHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: query: explain create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME nzhang_ctas3) TOK_LIKETABLE (TOK_TABLESERIALIZER (TOK_SERDENAME "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe")) TOK_TBLRCFILE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (/ (TOK_TABLE_OR_COL key) 2) half_key) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_TABLE_OR_COL value) "_con") conb)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL half_key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL conb))) (TOK_LIMIT 10)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -354,36 +309,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key / 2) - type: double - expr: concat(value, '_con') - type: string + expressions: (key / 2) (type: double), concat(value, '_con') (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double - expr: _col1 - type: string + key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: double - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -391,28 +337,22 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: double - expr: _col1 - type: string + key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: double - expr: _col1 - type: string + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat @@ -429,13 +369,10 @@ STAGE PLANS: Create Table Operator: Create Table columns: half_key double, conb string - if not exists: false input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde name: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: nzhang_ctas3 - isExternal: false Stage: Stage-3 Stats-Aggr Operator @@ -503,9 +440,6 @@ PREHOOK: query: explain create table if not exists nzhang_ctas3 as select key, v PREHOOK: type: CREATETABLE POSTHOOK: query: explain create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 POSTHOOK: type: CREATETABLE -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME nzhang_ctas3) TOK_IFNOTEXISTS TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 2)))) - STAGE DEPENDENCIES: STAGE PLANS: @@ -569,9 +503,6 @@ PREHOOK: query: explain create table nzhang_ctas4 row format delimited fields te PREHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: query: explain create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME nzhang_ctas4) TOK_LIKETABLE (TOK_TABLEROWFORMAT (TOK_SERDEPROPS (TOK_TABLEROWFORMATFIELD ','))) TOK_TBLTEXTFILE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -582,36 +513,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -619,28 +541,22 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -658,12 +574,9 @@ STAGE PLANS: Create Table columns: key string, value string field delimiter: , - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: nzhang_ctas4 - isExternal: false Stage: Stage-3 Stats-Aggr Operator @@ -733,7 +646,44 @@ PREHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: query: explain extended create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 POSTHOOK: type: CREATETABLE_AS_SELECT ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME nzhang_ctas5) TOK_LIKETABLE (TOK_TABLEROWFORMAT (TOK_SERDEPROPS (TOK_TABLEROWFORMATFIELD ',') (TOK_TABLEROWFORMATLINES '\012'))) TOK_TBLTEXTFILE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10)))) + +TOK_CREATETABLE + TOK_TABNAME + nzhang_ctas5 + TOK_LIKETABLE + TOK_TABLEROWFORMAT + TOK_SERDEPROPS + TOK_TABLEROWFORMATFIELD + ',' + TOK_TABLEROWFORMATLINES + '\012' + TOK_TBLTEXTFILE + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SORTBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + TOK_LIMIT + 10 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -745,37 +695,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + value expressions: _col0 (type: string), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -826,11 +760,10 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit - Statistics: - numRows: 10 dataSize: 2000 basicStatsState: COMPLETE colStatsState: NONE + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -851,25 +784,15 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Statistics: - numRows: 10 dataSize: 2000 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + value expressions: _col0 (type: string), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -898,18 +821,16 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 10 dataSize: 2000 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit - Statistics: - numRows: 10 dataSize: 2000 basicStatsState: COMPLETE colStatsState: NONE + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 10 dataSize: 2000 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -940,14 +861,11 @@ STAGE PLANS: Create Table columns: key string, value string field delimiter: , - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat line delimiter: - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: nzhang_ctas5 - isExternal: false Stage: Stage-3 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/ctas_uses_database_location.q.out ql/src/test/results/clientpositive/ctas_uses_database_location.q.out index a9b7810..420dddb 100644 --- ql/src/test/results/clientpositive/ctas_uses_database_location.q.out +++ ql/src/test/results/clientpositive/ctas_uses_database_location.q.out @@ -20,9 +20,6 @@ PREHOOK: query: EXPLAIN CREATE TABLE table_db1 AS SELECT * FROM default.src PREHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: query: EXPLAIN CREATE TABLE table_db1 AS SELECT * FROM default.src POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME table_db1) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME default src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -37,20 +34,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -76,24 +70,19 @@ STAGE PLANS: Create Table Operator: Create Table columns: key string, value string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: table_db1 - isExternal: false Stage: Stage-2 Stats-Aggr Operator Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -102,12 +91,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/decimal_precision.q.out ql/src/test/results/clientpositive/decimal_precision.q.out index fcc55e1..a80695c 100644 --- ql/src/test/results/clientpositive/decimal_precision.q.out +++ ql/src/test/results/clientpositive/decimal_precision.q.out @@ -521,9 +521,6 @@ PREHOOK: query: EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_PRECISION))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL dec))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL dec)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -531,48 +528,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - decimal_precision + Map Operator Tree: TableScan alias: decimal_precision + Statistics: Num rows: 23 Data size: 2661 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: dec - type: decimal(20,10) + expressions: dec (type: decimal(20,10)) outputColumnNames: dec + Statistics: Num rows: 23 Data size: 2661 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: avg(dec) - expr: sum(dec) - bucketGroup: false + aggregations: avg(dec), sum(dec) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct - expr: _col1 - type: decimal(30,10) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) Reduce Operator Tree: Group By Operator - aggregations: - expr: avg(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: avg(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: decimal(24,14) - expr: _col1 - type: decimal(30,10) + expressions: _col0 (type: decimal(24,14)), _col1 (type: decimal(30,10)) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/decimal_udf.q.out ql/src/test/results/clientpositive/decimal_udf.q.out index f3dc7a9..74ae554 100644 --- ql/src/test/results/clientpositive/decimal_udf.q.out +++ ql/src/test/results/clientpositive/decimal_udf.q.out @@ -25,9 +25,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- addition EXPLAIN SELECT key + key FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -38,11 +35,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key + key) - type: decimal(21,10) + expressions: (key + key) (type: decimal(21,10)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key + key FROM DECIMAL_UDF @@ -95,9 +92,6 @@ PREHOOK: query: EXPLAIN SELECT key + value FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key + value FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -108,11 +102,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key + value) - type: decimal(21,10) + expressions: (key + value) (type: decimal(21,10)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key + value FROM DECIMAL_UDF @@ -165,9 +159,6 @@ PREHOOK: query: EXPLAIN SELECT key + (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key + (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (/ (TOK_TABLE_OR_COL value) 2)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -178,11 +169,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key + (value / 2)) - type: decimal(21,10) + expressions: (key + (value / 2)) (type: decimal(21,10)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key + (value/2) FROM DECIMAL_UDF @@ -235,9 +226,6 @@ PREHOOK: query: EXPLAIN SELECT key + '1.0' FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key + '1.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) '1.0'))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -248,11 +236,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key + '1.0') - type: double + expressions: (key + '1.0') (type: double) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key + '1.0' FROM DECIMAL_UDF @@ -307,9 +295,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- substraction EXPLAIN SELECT key - key FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (- (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -320,11 +305,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key - key) - type: decimal(21,10) + expressions: (key - key) (type: decimal(21,10)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key - key FROM DECIMAL_UDF @@ -377,9 +362,6 @@ PREHOOK: query: EXPLAIN SELECT key - value FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key - value FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (- (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -390,11 +372,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key - value) - type: decimal(21,10) + expressions: (key - value) (type: decimal(21,10)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key - value FROM DECIMAL_UDF @@ -447,9 +429,6 @@ PREHOOK: query: EXPLAIN SELECT key - (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key - (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (- (TOK_TABLE_OR_COL key) (/ (TOK_TABLE_OR_COL value) 2)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -460,11 +439,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key - (value / 2)) - type: decimal(21,10) + expressions: (key - (value / 2)) (type: decimal(21,10)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key - (value/2) FROM DECIMAL_UDF @@ -517,9 +496,6 @@ PREHOOK: query: EXPLAIN SELECT key - '1.0' FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key - '1.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (- (TOK_TABLE_OR_COL key) '1.0'))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -530,11 +506,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key - '1.0') - type: double + expressions: (key - '1.0') (type: double) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key - '1.0' FROM DECIMAL_UDF @@ -589,9 +565,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- multiplication EXPLAIN SELECT key * key FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (* (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -602,11 +575,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key * key) - type: decimal(38,20) + expressions: (key * key) (type: decimal(38,20)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key * key FROM DECIMAL_UDF @@ -659,9 +632,6 @@ PREHOOK: query: EXPLAIN SELECT key * value FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key * value FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (* (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -672,11 +642,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key * value) - type: decimal(31,10) + expressions: (key * value) (type: decimal(31,10)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key * value FROM DECIMAL_UDF @@ -729,9 +699,6 @@ PREHOOK: query: EXPLAIN SELECT key * (value/2) FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key * (value/2) FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (* (TOK_TABLE_OR_COL key) (/ (TOK_TABLE_OR_COL value) 2)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -742,11 +709,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key * (value / 2)) - type: decimal(37,16) + expressions: (key * (value / 2)) (type: decimal(37,16)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key * (value/2) FROM DECIMAL_UDF @@ -799,9 +766,6 @@ PREHOOK: query: EXPLAIN SELECT key * '2.0' FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key * '2.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (* (TOK_TABLE_OR_COL key) '2.0'))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -812,11 +776,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key * '2.0') - type: double + expressions: (key * '2.0') (type: double) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key * '2.0' FROM DECIMAL_UDF @@ -871,9 +835,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- division EXPLAIN SELECT key / 0 FROM DECIMAL_UDF limit 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (/ (TOK_TABLE_OR_COL key) 0))) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -884,12 +845,14 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key / 0) - type: decimal(22,12) + expressions: (key / 0) (type: decimal(22,12)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key / 0 FROM DECIMAL_UDF limit 1 @@ -905,9 +868,6 @@ PREHOOK: query: EXPLAIN SELECT key / NULL FROM DECIMAL_UDF limit 1 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key / NULL FROM DECIMAL_UDF limit 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (/ (TOK_TABLE_OR_COL key) TOK_NULL))) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -918,12 +878,14 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key / null) - type: double + expressions: (key / null) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key / NULL FROM DECIMAL_UDF limit 1 @@ -939,9 +901,6 @@ PREHOOK: query: EXPLAIN SELECT key / key FROM DECIMAL_UDF WHERE key is not null PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (/ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)))) (TOK_WHERE (and (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL key)) (<> (TOK_TABLE_OR_COL key) 0))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -952,15 +911,14 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key is not null and (key <> 0)) - type: boolean + predicate: (key is not null and (key <> 0)) (type: boolean) + Statistics: Num rows: 2 Data size: 239 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key / key) - type: decimal(38,24) + expressions: (key / key) (type: decimal(38,24)) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 239 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key / key FROM DECIMAL_UDF WHERE key is not null and key <> 0 @@ -1009,9 +967,6 @@ PREHOOK: query: EXPLAIN SELECT key / value FROM DECIMAL_UDF WHERE value is not n PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (/ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) (TOK_WHERE (and (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL value)) (<> (TOK_TABLE_OR_COL value) 0))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1022,15 +977,14 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (value is not null and (value <> 0)) - type: boolean + predicate: (value is not null and (value <> 0)) (type: boolean) + Statistics: Num rows: 2 Data size: 239 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key / value) - type: decimal(31,21) + expressions: (key / value) (type: decimal(31,21)) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 239 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key / value FROM DECIMAL_UDF WHERE value is not null and value <> 0 @@ -1069,9 +1023,6 @@ PREHOOK: query: EXPLAIN SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (/ (TOK_TABLE_OR_COL key) (/ (TOK_TABLE_OR_COL value) 2)))) (TOK_WHERE (and (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL value)) (<> (TOK_TABLE_OR_COL value) 0))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1082,15 +1033,14 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (value is not null and (value <> 0)) - type: boolean + predicate: (value is not null and (value <> 0)) (type: boolean) + Statistics: Num rows: 2 Data size: 239 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key / (value / 2)) - type: decimal(38,24) + expressions: (key / (value / 2)) (type: decimal(38,24)) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 239 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key / (value/2) FROM DECIMAL_UDF WHERE value is not null and value <> 0 @@ -1129,9 +1079,6 @@ PREHOOK: query: EXPLAIN SELECT key / '2.0' FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key / '2.0' FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (/ (TOK_TABLE_OR_COL key) '2.0'))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1142,11 +1089,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key / '2.0') - type: double + expressions: (key / '2.0') (type: double) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key / '2.0' FROM DECIMAL_UDF @@ -1201,9 +1148,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- abs EXPLAIN SELECT abs(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION abs (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1214,11 +1158,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: abs(key) - type: decimal(38,18) + expressions: abs(key) (type: decimal(38,18)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT abs(key) FROM DECIMAL_UDF @@ -1273,9 +1217,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- avg EXPLAIN SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (/ (TOK_FUNCTION sum (TOK_TABLE_OR_COL key)) (TOK_FUNCTION count (TOK_TABLE_OR_COL key)))) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL value)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1284,70 +1225,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - decimal_udf + Map Operator Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: int - expr: key - type: decimal(20,10) + expressions: value (type: int), key (type: decimal(20,10)) outputColumnNames: value, key + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(key) - expr: count(key) - expr: avg(key) - bucketGroup: false - keys: - expr: value - type: int + aggregations: sum(key), count(key), avg(key) + keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: decimal(30,10) - expr: _col2 - type: bigint - expr: _col3 - type: struct + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint), _col3 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: count(VALUE._col1) - expr: avg(VALUE._col2) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: (_col1 / _col2) - type: decimal(38,23) - expr: _col3 - type: decimal(24,14) - expr: _col1 - type: decimal(30,10) + expressions: _col0 (type: int), (_col1 / _col2) (type: decimal(38,23)), _col3 (type: decimal(24,14)), _col1 (type: decimal(30,10)) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1355,29 +1265,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: decimal(38,23) - expr: _col2 - type: decimal(24,14) - expr: _col3 - type: decimal(30,10) + Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: decimal(38,23)), _col2 (type: decimal(24,14)), _col3 (type: decimal(30,10)) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1418,9 +1318,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- negative EXPLAIN SELECT -key FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (- (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1431,11 +1328,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (- key) - type: decimal(20,10) + expressions: (- key) (type: decimal(20,10)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT -key FROM DECIMAL_UDF @@ -1490,9 +1387,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- positive EXPLAIN SELECT +key FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1503,11 +1397,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: decimal(20,10) + expressions: key (type: decimal(20,10)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT +key FROM DECIMAL_UDF @@ -1562,9 +1456,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- ceiling EXPlAIN SELECT CEIL(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION CEIL (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1575,11 +1466,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: ceil(key) - type: decimal(11,0) + expressions: ceil(key) (type: decimal(11,0)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT CEIL(key) FROM DECIMAL_UDF @@ -1634,9 +1525,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- floor EXPLAIN SELECT FLOOR(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION FLOOR (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1647,11 +1535,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: floor(key) - type: decimal(11,0) + expressions: floor(key) (type: decimal(11,0)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT FLOOR(key) FROM DECIMAL_UDF @@ -1706,9 +1594,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- round EXPLAIN SELECT ROUND(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION ROUND (TOK_TABLE_OR_COL key) 2))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1719,11 +1604,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: round(key, 2) - type: decimal(13,2) + expressions: round(key, 2) (type: decimal(13,2)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT ROUND(key, 2) FROM DECIMAL_UDF @@ -1778,9 +1663,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- power EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION POWER (TOK_TABLE_OR_COL key) 2))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1791,11 +1673,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: power(key, 2) - type: double + expressions: power(key, 2) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT POWER(key, 2) FROM DECIMAL_UDF @@ -1850,9 +1732,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- modulo EXPLAIN SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (% (+ (TOK_TABLE_OR_COL key) 1) (/ (TOK_TABLE_OR_COL key) 2)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1863,11 +1742,11 @@ STAGE PLANS: Processor Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: ((key + 1) % (key / 2)) - type: decimal(22,12) + expressions: ((key + 1) % (key / 2)) (type: decimal(22,12)) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF @@ -1922,9 +1801,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- stddev, var EXPLAIN SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION stddev (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION variance (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1932,64 +1808,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - decimal_udf + Map Operator Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: int - expr: key - type: decimal(20,10) + expressions: value (type: int), key (type: decimal(20,10)) outputColumnNames: value, key + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: stddev(key) - expr: variance(key) - bucketGroup: false - keys: - expr: value - type: int + aggregations: stddev(key), variance(key) + keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: struct - expr: _col2 - type: struct + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: stddev(VALUE._col0) - expr: variance(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: stddev(VALUE._col0), variance(VALUE._col1) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: double - expr: _col2 - type: double + expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2030,9 +1882,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- stddev_samp, var_samp EXPLAIN SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION stddev_samp (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION var_samp (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2040,64 +1889,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - decimal_udf + Map Operator Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: int - expr: key - type: decimal(20,10) + expressions: value (type: int), key (type: decimal(20,10)) outputColumnNames: value, key + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: stddev_samp(key) - expr: var_samp(key) - bucketGroup: false - keys: - expr: value - type: int + aggregations: stddev_samp(key), var_samp(key) + keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: struct - expr: _col2 - type: struct + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: stddev_samp(VALUE._col0) - expr: var_samp(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: stddev_samp(VALUE._col0), var_samp(VALUE._col1) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: double - expr: _col2 - type: double + expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 119 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2138,9 +1963,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- histogram EXPLAIN SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION histogram_numeric (TOK_TABLE_OR_COL key) 3))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2148,42 +1970,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - decimal_udf + Map Operator Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: decimal(20,10) + expressions: key (type: decimal(20,10)) outputColumnNames: key + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: histogram_numeric(key, 3) - bucketGroup: false + aggregations: histogram_numeric(key, 3) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: array + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: array) Reduce Operator Tree: Group By Operator - aggregations: - expr: histogram_numeric(VALUE._col0) - bucketGroup: false + aggregations: histogram_numeric(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: _col0 - type: array> + expressions: _col0 (type: array>) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2208,9 +2024,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- min EXPLAIN SELECT MIN(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION MIN (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2218,42 +2031,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - decimal_udf + Map Operator Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: decimal(20,10) + expressions: key (type: decimal(20,10)) outputColumnNames: key + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: min(key) - bucketGroup: false + aggregations: min(key) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: decimal(20,10) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(20,10)) Reduce Operator Tree: Group By Operator - aggregations: - expr: min(VALUE._col0) - bucketGroup: false + aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: decimal(20,10) + expressions: _col0 (type: decimal(20,10)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2278,9 +2085,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- max EXPLAIN SELECT MAX(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION MAX (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2288,42 +2092,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - decimal_udf + Map Operator Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: decimal(20,10) + expressions: key (type: decimal(20,10)) outputColumnNames: key + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: max(key) - bucketGroup: false + aggregations: max(key) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: decimal(20,10) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(20,10)) Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - bucketGroup: false + aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: decimal(20,10) + expressions: _col0 (type: decimal(20,10)) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2348,9 +2146,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- count EXPLAIN SELECT COUNT(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2358,42 +2153,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - decimal_udf + Map Operator Tree: TableScan alias: decimal_udf + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: decimal(20,10) + expressions: key (type: decimal(20,10)) outputColumnNames: key + Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(key) - bucketGroup: false + aggregations: count(key) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/disable_merge_for_bucketing.q.out ql/src/test/results/clientpositive/disable_merge_for_bucketing.q.out index b6ab31e..7265313 100644 --- ql/src/test/results/clientpositive/disable_merge_for_bucketing.q.out +++ ql/src/test/results/clientpositive/disable_merge_for_bucketing.q.out @@ -12,7 +12,21 @@ insert overwrite table bucket2_1 select * from src POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucket2_1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucket2_1 + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -22,35 +36,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: UDFToInteger(_col0) - type: int - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + value expressions: _col0 (type: string), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -101,24 +101,17 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 2 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -186,9 +179,6 @@ select * from bucket2_1 tablesample (bucket 1 out of 2) s order by key POSTHOOK: type: QUERY POSTHOOK: Lineage: bucket2_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bucket2_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME bucket2_1) (TOK_TABLEBUCKETSAMPLE 1 2) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -196,37 +186,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((hash(key) & 2147483647) % 2) = 0) - type: boolean + predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/drop_multi_partitions.q.out ql/src/test/results/clientpositive/drop_multi_partitions.q.out index 31cd197..00d4b6d 100644 --- ql/src/test/results/clientpositive/drop_multi_partitions.q.out +++ ql/src/test/results/clientpositive/drop_multi_partitions.q.out @@ -36,7 +36,15 @@ PREHOOK: type: ALTERTABLE_DROPPARTS POSTHOOK: query: explain extended alter table mp drop partition (b='1') POSTHOOK: type: ALTERTABLE_DROPPARTS ABSTRACT SYNTAX TREE: - (TOK_ALTERTABLE_DROPPARTS mp (TOK_PARTSPEC (TOK_PARTVAL b = '1'))) + +TOK_ALTERTABLE_DROPPARTS + mp + TOK_PARTSPEC + TOK_PARTVAL + b + = + '1' + STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/drop_udf.q.out ql/src/test/results/clientpositive/drop_udf.q.out index b7de39b..422933a 100644 --- ql/src/test/results/clientpositive/drop_udf.q.out +++ ql/src/test/results/clientpositive/drop_udf.q.out @@ -8,9 +8,6 @@ PREHOOK: type: DROPFUNCTION POSTHOOK: query: EXPLAIN DROP TEMPORARY FUNCTION test_translate POSTHOOK: type: DROPFUNCTION -ABSTRACT SYNTAX TREE: - (TOK_DROPFUNCTION test_translate) - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out index 50080b8..545d527 100644 --- ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out +++ ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out @@ -54,7 +54,31 @@ POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=__HIVE_DEFAU POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=1).intcol SIMPLE [] POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=__HIVE_DEFAULT_PARTITION__).intcol SIMPLE [] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dynamic_part_table))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL intcol))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL partcol1) '1') (= (TOK_TABLE_OR_COL partcol2) '1'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + dynamic_part_table + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + intcol + TOK_WHERE + and + = + TOK_TABLE_OR_COL + partcol1 + '1' + = + TOK_TABLE_OR_COL + partcol2 + '1' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -63,27 +87,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - dynamic_part_table + Map Operator Tree: TableScan alias: dynamic_part_table - Statistics: - numRows: 1 dataSize: 1 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: intcol - type: string + expressions: intcol (type: string) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 1 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 1 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -160,7 +178,31 @@ POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=__HIVE_DEFAU POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=1).intcol SIMPLE [] POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=__HIVE_DEFAULT_PARTITION__).intcol SIMPLE [] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dynamic_part_table))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL intcol))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL partcol1) '1') (= (TOK_TABLE_OR_COL partcol2) '1'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + dynamic_part_table + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + intcol + TOK_WHERE + and + = + TOK_TABLE_OR_COL + partcol1 + '1' + = + TOK_TABLE_OR_COL + partcol2 + '1' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -169,27 +211,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - dynamic_part_table + Map Operator Tree: TableScan alias: dynamic_part_table - Statistics: - numRows: 1 dataSize: 1 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: intcol - type: string + expressions: intcol (type: string) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 1 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 1 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -266,7 +302,41 @@ POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=1,partcol2=__HIVE_DEFAU POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=1).intcol SIMPLE [] POSTHOOK: Lineage: dynamic_part_table PARTITION(partcol1=__HIVE_DEFAULT_PARTITION__,partcol2=__HIVE_DEFAULT_PARTITION__).intcol SIMPLE [] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dynamic_part_table))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL intcol))) (TOK_WHERE (or (and (= (TOK_TABLE_OR_COL partcol1) '1') (= (TOK_TABLE_OR_COL partcol2) '1')) (and (= (TOK_TABLE_OR_COL partcol1) '1') (= (TOK_TABLE_OR_COL partcol2) '__HIVE_DEFAULT_PARTITION__')))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + dynamic_part_table + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + intcol + TOK_WHERE + or + and + = + TOK_TABLE_OR_COL + partcol1 + '1' + = + TOK_TABLE_OR_COL + partcol2 + '1' + and + = + TOK_TABLE_OR_COL + partcol1 + '1' + = + TOK_TABLE_OR_COL + partcol2 + '__HIVE_DEFAULT_PARTITION__' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -275,27 +345,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - dynamic_part_table + Map Operator Tree: TableScan alias: dynamic_part_table - Statistics: - numRows: 2 dataSize: 2 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: intcol - type: string + expressions: intcol (type: string) outputColumnNames: _col0 - Statistics: - numRows: 2 dataSize: 2 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2 dataSize: 2 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/escape_clusterby1.q.out ql/src/test/results/clientpositive/escape_clusterby1.q.out index 6ecb97e..e608c12 100644 --- ql/src/test/results/clientpositive/escape_clusterby1.q.out +++ ql/src/test/results/clientpositive/escape_clusterby1.q.out @@ -6,9 +6,6 @@ POSTHOOK: query: -- escaped column names in cluster by are not working jira 3267 explain select key, value from src cluster by key, value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -16,40 +13,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -65,9 +48,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select `key`, value from src cluster by `key`, value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -75,40 +55,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/escape_distributeby1.q.out ql/src/test/results/clientpositive/escape_distributeby1.q.out index 0f04826..7c54485 100644 --- ql/src/test/results/clientpositive/escape_distributeby1.q.out +++ ql/src/test/results/clientpositive/escape_distributeby1.q.out @@ -6,9 +6,6 @@ POSTHOOK: query: -- escaped column names in distribute by by are not working jir explain select key, value from src distribute by key, value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -16,35 +13,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -60,9 +47,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select `key`, value from src distribute by `key`, value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -70,35 +54,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/escape_orderby1.q.out ql/src/test/results/clientpositive/escape_orderby1.q.out index 59c637c..ec23738 100644 --- ql/src/test/results/clientpositive/escape_orderby1.q.out +++ ql/src/test/results/clientpositive/escape_orderby1.q.out @@ -6,9 +6,6 @@ POSTHOOK: query: -- escaped column names in order by are not working jira 3267 explain select key, value from src order by key, value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -16,35 +13,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -60,9 +47,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select `key`, value from src order by `key`, value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -70,35 +54,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/escape_sortby1.q.out ql/src/test/results/clientpositive/escape_sortby1.q.out index 5d5f29d..4533adf 100644 --- ql/src/test/results/clientpositive/escape_sortby1.q.out +++ ql/src/test/results/clientpositive/escape_sortby1.q.out @@ -6,9 +6,6 @@ POSTHOOK: query: -- escaped column names in sort by are not working jira 3267 explain select key, value from src sort by key, value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -16,35 +13,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -60,9 +47,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select `key`, value from src sort by `key`, value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -70,35 +54,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/explain_logical.q.out ql/src/test/results/clientpositive/explain_logical.q.out index 437844c..63f2050 100644 --- ql/src/test/results/clientpositive/explain_logical.q.out +++ ql/src/test/results/clientpositive/explain_logical.q.out @@ -46,55 +46,69 @@ POSTHOOK: query: -- Simple select queries, union queries and join queries EXPLAIN LOGICAL SELECT key, count(1) FROM srcpart WHERE ds IS NOT NULL GROUP BY key POSTHOOK: type: QUERY -LOGICAL PLAN +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_WHERE + TOK_FUNCTION + TOK_ISNOTNULL + TOK_TABLE_OR_COL + ds + TOK_GROUPBY + TOK_TABLE_OR_COL + key + + +LOGICAL PLAN: srcpart TableScan (TS_0) alias: srcpart + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_2) - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Group By Operator (GBY_3) - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_4) - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Group By Operator (GBY_5) - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_6) - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE File Output Operator (FS_7) compressed: false - GlobalTableId: 0 + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -106,55 +120,79 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN LOGICAL SELECT key, count(1) FROM (SELECT key, value FROM src) subq1 GROUP BY key POSTHOOK: type: QUERY -LOGICAL PLAN +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + subq1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + + +LOGICAL PLAN: subq1:src TableScan (TS_0) alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_1) - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator (GBY_3) - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_4) - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Group By Operator (GBY_5) - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_6) - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator (FS_7) compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -170,28 +208,77 @@ POSTHOOK: query: EXPLAIN LOGICAL SELECT key, value FROM src UNION ALL SELECT key, value FROM srcpart WHERE ds IS NOT NULL ) S1 POSTHOOK: type: QUERY -LOGICAL PLAN +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_UNION + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + TOK_FUNCTION + TOK_ISNOTNULL + TOK_TABLE_OR_COL + ds + S1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +LOGICAL PLAN: null-subquery1:s1-subquery1:src TableScan (TS_0) alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_1) - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union (UNION_5) + Statistics: Num rows: 145 Data size: 29060 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_6) - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 145 Data size: 29060 Basic stats: COMPLETE Column stats: NONE File Output Operator (FS_7) compressed: false - GlobalTableId: 0 + Statistics: Num rows: 145 Data size: 29060 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -199,14 +286,13 @@ null-subquery1:s1-subquery1:src null-subquery2:s1-subquery2:srcpart TableScan (TS_2) alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_4) - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Union (UNION_5) + Statistics: Num rows: 145 Data size: 29060 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: EXPLAIN LOGICAL SELECT S1.key, S2.value FROM src S1 JOIN srcpart S2 ON S1.key = S2.key WHERE ds IS NOT NULL @@ -214,40 +300,76 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN LOGICAL SELECT S1.key, S2.value FROM src S1 JOIN srcpart S2 ON S1.key = S2.key WHERE ds IS NOT NULL POSTHOOK: type: QUERY -LOGICAL PLAN +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src + S1 + TOK_TABREF + TOK_TABNAME + srcpart + S2 + = + . + TOK_TABLE_OR_COL + S1 + key + . + TOK_TABLE_OR_COL + S2 + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + S1 + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + S2 + value + TOK_WHERE + TOK_FUNCTION + TOK_ISNOTNULL + TOK_TABLE_OR_COL + ds + + +LOGICAL PLAN: s1 TableScan (TS_1) alias: s1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_2) - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Join Operator (JOIN_4) condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col5 + Statistics: Num rows: 127 Data size: 25572 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_6) - expressions: - expr: _col0 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 127 Data size: 25572 Basic stats: COMPLETE Column stats: NONE File Output Operator (FS_7) compressed: false - GlobalTableId: 0 + Statistics: Num rows: 127 Data size: 25572 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -255,26 +377,21 @@ s1 s2 TableScan (TS_0) alias: s2 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_3) - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Join Operator (JOIN_4) condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col5 + Statistics: Num rows: 127 Data size: 25572 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: -- With views EXPLAIN LOGICAL SELECT * FROM V1 @@ -282,20 +399,34 @@ PREHOOK: type: QUERY POSTHOOK: query: -- With views EXPLAIN LOGICAL SELECT * FROM V1 POSTHOOK: type: QUERY -LOGICAL PLAN +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + V1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +LOGICAL PLAN: v1:src TableScan (TS_0) alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_1) - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator (FS_3) compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -305,22 +436,34 @@ PREHOOK: query: EXPLAIN LOGICAL SELECT * FROM V2 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN LOGICAL SELECT * FROM V2 POSTHOOK: type: QUERY -LOGICAL PLAN +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + V2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +LOGICAL PLAN: v2:srcpart TableScan (TS_0) alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_2) - expressions: - expr: ds - type: string - expr: key - type: string - expr: value - type: string + expressions: ds (type: string), key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE File Output Operator (FS_4) compressed: false - GlobalTableId: 0 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -330,45 +473,52 @@ PREHOOK: query: EXPLAIN LOGICAL SELECT * FROM V3 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN LOGICAL SELECT * FROM V3 POSTHOOK: type: QUERY -LOGICAL PLAN +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + V3 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +LOGICAL PLAN: v3:src1:srcpart TableScan (TS_0) alias: srcpart + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_2) - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col1 + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_4) - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col1 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Join Operator (JOIN_6) condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col1, _col4 + Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_8) - expressions: - expr: _col1 - type: string - expr: _col4 - type: string + expressions: _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE File Output Operator (FS_10) compressed: false - GlobalTableId: 0 + Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -376,52 +526,57 @@ v3:src1:srcpart v3:src2 TableScan (TS_3) alias: src2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_5) - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Join Operator (JOIN_6) condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col1, _col4 + Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: EXPLAIN LOGICAL SELECT * FROM V4 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN LOGICAL SELECT * FROM V4 POSTHOOK: type: QUERY -LOGICAL PLAN +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + V4 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +LOGICAL PLAN: v4:src1:src TableScan (TS_3) alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_4) - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_6) - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Join Operator (JOIN_9) condition map: Inner Join 0 to 1 @@ -430,20 +585,15 @@ v4:src1:src 0 {VALUE._col0} 1 {VALUE._col2} 2 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col4, _col6 + Statistics: Num rows: 255 Data size: 51145 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_10) - expressions: - expr: _col0 - type: string - expr: _col4 - type: string - expr: _col6 - type: string + expressions: _col0 (type: string), _col4 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 255 Data size: 51145 Basic stats: COMPLETE Column stats: NONE File Output Operator (FS_12) compressed: false - GlobalTableId: 0 + Statistics: Num rows: 255 Data size: 51145 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -451,25 +601,17 @@ v4:src1:src v4:src2:srcpart TableScan (TS_0) alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_2) - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col1, _col2 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_7) - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 1 - value expressions: - expr: _col2 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) Join Operator (JOIN_9) condition map: Inner Join 0 to 1 @@ -478,23 +620,18 @@ v4:src2:srcpart 0 {VALUE._col0} 1 {VALUE._col2} 2 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col4, _col6 + Statistics: Num rows: 255 Data size: 51145 Basic stats: COMPLETE Column stats: NONE v4:src3 TableScan (TS_5) alias: src3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_8) - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Join Operator (JOIN_9) condition map: Inner Join 0 to 1 @@ -503,8 +640,8 @@ v4:src3 0 {VALUE._col0} 1 {VALUE._col2} 2 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col4, _col6 + Statistics: Num rows: 255 Data size: 51145 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: -- The table should show up in the explain logical even if none -- of the partitions are selected. @@ -519,28 +656,37 @@ PREHOOK: query: EXPLAIN LOGICAL SELECT * FROM V5 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN LOGICAL SELECT * FROM V5 POSTHOOK: type: QUERY -LOGICAL PLAN +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + V5 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +LOGICAL PLAN: v5:srcpart TableScan (TS_0) alias: srcpart + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator (FIL_5) - predicate: - expr: (ds = '10') - type: boolean + predicate: (ds = '10') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator (SEL_2) - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator (FS_4) compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -550,102 +696,135 @@ PREHOOK: query: EXPLAIN LOGICAL SELECT s1.key, s1.cnt, s2.value FROM (SELECT key PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN LOGICAL SELECT s1.key, s1.cnt, s2.value FROM (SELECT key, count(value) as cnt FROM src GROUP BY key) s1 JOIN src s2 ON (s1.key = s2.key) ORDER BY s1.key POSTHOOK: type: QUERY -LOGICAL PLAN +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + TOK_TABLE_OR_COL + value + cnt + TOK_GROUPBY + TOK_TABLE_OR_COL + key + s1 + TOK_TABREF + TOK_TABNAME + src + s2 + = + . + TOK_TABLE_OR_COL + s1 + key + . + TOK_TABLE_OR_COL + s2 + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + s1 + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + s1 + cnt + TOK_SELEXPR + . + TOK_TABLE_OR_COL + s2 + value + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + s1 + key + + +LOGICAL PLAN: s1:src TableScan (TS_0) alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_1) - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator (GBY_2) - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_3) - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Group By Operator (GBY_4) - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_5) - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_7) - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Join Operator (JOIN_9) condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_10) - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_11) - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) Extract (EX_12) + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator (FS_13) compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -653,24 +832,19 @@ s1:src s2 TableScan (TS_6) alias: s2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_8) - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Join Operator (JOIN_9) condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/explain_rearrange.q.out ql/src/test/results/clientpositive/explain_rearrange.q.out index cac642d..de61b78 100644 --- ql/src/test/results/clientpositive/explain_rearrange.q.out +++ ql/src/test/results/clientpositive/explain_rearrange.q.out @@ -52,9 +52,6 @@ join on src1.key = src2.key order by src1.key, src1.cnt1, src2.cnt1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) cnt1))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) cnt1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage [MAPRED] Stage-9 depends on stages: Stage-1, Stage-5 , consists of Stage-7, Stage-8, Stage-3 [CONDITIONAL] @@ -68,68 +65,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -140,8 +113,7 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -149,24 +121,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -183,31 +146,16 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint + key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -215,8 +163,7 @@ STAGE PLANS: Stage: Stage-8 Map Reduce - Alias -> Map Operator Tree: - $INTNAME1 + Map Operator Tree: TableScan Map Join Operator condition map: @@ -224,24 +171,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -258,37 +196,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -296,20 +216,12 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -317,68 +229,44 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - src2:subq2:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -428,9 +316,6 @@ join on src1.key = src2.key order by src1.key, src1.cnt1, src2.cnt1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) cnt1))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) cnt1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage [MAPRED] Stage-2 depends on stages: Stage-1, Stage-7 , consists of Stage-3, Stage-5, Stage-6 [CONDITIONAL] @@ -444,68 +329,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -516,8 +377,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -525,24 +385,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -559,31 +410,16 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint + key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -591,8 +427,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - $INTNAME1 + Map Operator Tree: TableScan Map Join Operator condition map: @@ -600,24 +435,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -634,37 +460,19 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -672,20 +480,12 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -693,68 +493,44 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: - src2:subq2:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -804,9 +580,6 @@ join on src1.key = src2.key order by src1.key, src1.cnt1, src2.cnt1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) cnt1))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) cnt1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage [MAPRED] Stage-2 is a root stage [MAPRED] @@ -820,68 +593,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -889,68 +638,44 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src2:subq2:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -961,8 +686,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -970,24 +694,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1004,8 +719,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - $INTNAME1 + Map Operator Tree: TableScan Map Join Operator condition map: @@ -1013,24 +727,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1047,37 +752,19 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -1085,20 +772,12 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1106,31 +785,16 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint + key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1180,9 +844,6 @@ join on src1.key = src2.key order by src1.key, src1.cnt1, src2.cnt1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) cnt1))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) cnt1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage [MAPRED] Stage-2 is a root stage [MAPRED] @@ -1196,68 +857,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1265,68 +902,44 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src2:subq2:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1337,8 +950,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -1346,24 +958,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1380,8 +983,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - $INTNAME1 + Map Operator Tree: TableScan Map Join Operator condition map: @@ -1389,24 +991,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col1} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1423,37 +1016,19 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -1461,20 +1036,12 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1482,31 +1049,16 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint + key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/fetch_aggregation.q.out ql/src/test/results/clientpositive/fetch_aggregation.q.out index 0e4c204..9543dfd 100644 --- ql/src/test/results/clientpositive/fetch_aggregation.q.out +++ ql/src/test/results/clientpositive/fetch_aggregation.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(key),sum(key),avg(key),min(key),max(key),std(key),variance(key) from src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION std (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION variance (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,30 +11,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(key) - expr: sum(key) - expr: avg(key) - expr: min(key) - expr: max(key) - expr: std(key) - expr: variance(key) - bucketGroup: false + aggregations: count(key), sum(key), avg(key), min(key), max(key), std(key), variance(key) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -48,34 +36,14 @@ STAGE PLANS: limit: -1 Processor Tree: Group By Operator - aggregations: - expr: count(_col0) - expr: sum(_col1) - expr: avg(_col2) - expr: min(_col3) - expr: max(_col4) - expr: std(_col5) - expr: variance(_col6) - bucketGroup: false + aggregations: count(_col0), sum(_col1), avg(_col2), min(_col3), max(_col4), std(_col5), variance(_col6) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: double - expr: _col2 - type: double - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: double - expr: _col6 - type: double + expressions: _col0 (type: bigint), _col1 (type: double), _col2 (type: double), _col3 (type: string), _col4 (type: string), _col5 (type: double), _col6 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select count(key),sum(key),avg(key),min(key),max(key),std(key),variance(key) from src diff --git ql/src/test/results/clientpositive/fileformat_sequencefile.q.out ql/src/test/results/clientpositive/fileformat_sequencefile.q.out index f827c36..71034bd 100644 --- ql/src/test/results/clientpositive/fileformat_sequencefile.q.out +++ ql/src/test/results/clientpositive/fileformat_sequencefile.q.out @@ -8,9 +8,6 @@ CREATE TABLE dest1(key INT, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileInputFormat' OUTPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileOutputFormat' POSTHOOK: type: CREATETABLE -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME dest1) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL key TOK_INT) (TOK_TABCOL value TOK_STRING)) (TOK_TABLEFILEFORMAT 'org.apache.hadoop.mapred.SequenceFileInputFormat' 'org.apache.hadoop.mapred.SequenceFileOutputFormat')) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -19,12 +16,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: key int, value string - if not exists: false input format: org.apache.hadoop.mapred.SequenceFileInputFormat - # buckets: -1 output format: org.apache.hadoop.mapred.SequenceFileOutputFormat name: dest1 - isExternal: false PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileInputFormat' diff --git ql/src/test/results/clientpositive/fileformat_text.q.out ql/src/test/results/clientpositive/fileformat_text.q.out index 3da947d..005abbe 100644 --- ql/src/test/results/clientpositive/fileformat_text.q.out +++ ql/src/test/results/clientpositive/fileformat_text.q.out @@ -8,9 +8,6 @@ CREATE TABLE dest1(key INT, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat' POSTHOOK: type: CREATETABLE -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME dest1) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL key TOK_INT) (TOK_TABCOL value TOK_STRING)) (TOK_TABLEFILEFORMAT 'org.apache.hadoop.mapred.TextInputFormat' 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat')) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -19,12 +16,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: key int, value string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: dest1 - isExternal: false PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' diff --git ql/src/test/results/clientpositive/filter_join_breaktask.q.out ql/src/test/results/clientpositive/filter_join_breaktask.q.out index 612c2b1..ff91d35 100644 --- ql/src/test/results/clientpositive/filter_join_breaktask.q.out +++ ql/src/test/results/clientpositive/filter_join_breaktask.q.out @@ -28,7 +28,106 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: filter_join_breaktask PARTITION(ds=2008-04-08).key EXPRESSION [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: filter_join_breaktask PARTITION(ds=2008-04-08).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME filter_join_breaktask) f) (TOK_TABREF (TOK_TABNAME filter_join_breaktask) m) (AND (AND (AND (= (. (TOK_TABLE_OR_COL f) key) (. (TOK_TABLE_OR_COL m) key)) (= (. (TOK_TABLE_OR_COL f) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL m) ds) '2008-04-08')) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL f) key)))) (TOK_TABREF (TOK_TABNAME filter_join_breaktask) g) (AND (AND (AND (AND (= (. (TOK_TABLE_OR_COL g) value) (. (TOK_TABLE_OR_COL m) value)) (= (. (TOK_TABLE_OR_COL g) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL m) ds) '2008-04-08')) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL m) value))) (!= (. (TOK_TABLE_OR_COL m) value) '')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL f) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL g) value))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + filter_join_breaktask + f + TOK_TABREF + TOK_TABNAME + filter_join_breaktask + m + AND + AND + AND + = + . + TOK_TABLE_OR_COL + f + key + . + TOK_TABLE_OR_COL + m + key + = + . + TOK_TABLE_OR_COL + f + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + m + ds + '2008-04-08' + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + f + key + TOK_TABREF + TOK_TABNAME + filter_join_breaktask + g + AND + AND + AND + AND + = + . + TOK_TABLE_OR_COL + g + value + . + TOK_TABLE_OR_COL + m + value + = + . + TOK_TABLE_OR_COL + g + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + m + ds + '2008-04-08' + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + m + value + != + . + TOK_TABLE_OR_COL + m + value + '' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + f + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + g + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -38,61 +137,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - f + Map Operator Tree: TableScan alias: f - Statistics: - numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: key is not null - type: boolean - Statistics: - numRows: 13 dataSize: 109 basicStatsState: COMPLETE colStatsState: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 109 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - Statistics: - numRows: 13 dataSize: 109 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 109 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: - expr: key - type: int - m + value expressions: key (type: int) TableScan alias: m - Statistics: - numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key is not null and value is not null) and (value <> '')) - type: boolean - Statistics: - numRows: 7 dataSize: 59 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key is not null and value is not null) and (value <> '')) (type: boolean) + Statistics: Num rows: 7 Data size: 59 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - Statistics: - numRows: 7 dataSize: 59 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 59 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: - expr: value - type: string + value expressions: value (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -147,10 +222,8 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col6 - Statistics: - numRows: 14 dataSize: 119 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 14 Data size: 119 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -171,51 +244,31 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: - expr: _col6 - type: string + key expressions: _col6 (type: string) sort order: + - Map-reduce partition columns: - expr: _col6 - type: string - Statistics: - numRows: 14 dataSize: 119 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 14 Data size: 119 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: - expr: _col0 - type: int - g + value expressions: _col0 (type: int) TableScan alias: g - Statistics: - numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (value <> '') - type: boolean - Statistics: - numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE + predicate: (value <> '') (type: boolean) + Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: value (type: string) sort order: + - Map-reduce partition columns: - expr: value - type: string - Statistics: - numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: - expr: value - type: string + value expressions: value (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -291,26 +344,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col11 - Statistics: - numRows: 27 dataSize: 232 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col11 - type: string + expressions: _col0 (type: int), _col11 (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 27 dataSize: 232 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 27 dataSize: 232 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/filter_numeric.q.out ql/src/test/results/clientpositive/filter_numeric.q.out index bef27a2..d1fe77f 100644 --- ql/src/test/results/clientpositive/filter_numeric.q.out +++ ql/src/test/results/clientpositive/filter_numeric.q.out @@ -28,9 +28,6 @@ POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)s POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME partint))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (< (TOK_TABLE_OR_COL hr) 11)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -38,30 +35,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - partint - TableScan - alias: partint - Filter Operator - predicate: - expr: (hr < 11) - type: boolean - Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: hr - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -87,9 +60,6 @@ POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)s POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME partint))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (and (<= (TOK_TABLE_OR_COL hr) 12) (> (TOK_TABLE_OR_COL hr) 11))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -97,22 +67,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - partint + Map Operator Tree: TableScan alias: partint + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -644,9 +609,6 @@ POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)s POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME partint))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (TOK_FUNCTION between KW_FALSE (TOK_TABLE_OR_COL hr) 11 12)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -654,22 +616,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - partint + Map Operator Tree: TableScan alias: partint + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1703,9 +1660,6 @@ POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)s POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME partint))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (TOK_FUNCTION between KW_TRUE (TOK_TABLE_OR_COL hr) 12 14)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1713,22 +1667,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - partint + Map Operator Tree: TableScan alias: partint + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2260,9 +2209,6 @@ POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)s POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: partint PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME partint))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (< (TOK_TABLE_OR_COL hr) 13)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2270,22 +2216,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - partint + Map Operator Tree: TableScan alias: partint + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby1.q.out ql/src/test/results/clientpositive/groupby1.q.out index 4288585..49786a7 100644 --- ql/src/test/results/clientpositive/groupby1.q.out +++ ql/src/test/results/clientpositive/groupby1.q.out @@ -9,9 +9,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -21,42 +18,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: substr(value, 5) - type: string + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: substr(value, 5) (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -64,41 +48,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby10.q.out ql/src/test/results/clientpositive/groupby10.q.out index f766ed7..341427f 100644 --- ql/src/test/results/clientpositive/groupby10.q.out +++ ql/src/test/results/clientpositive/groupby10.q.out @@ -29,9 +29,6 @@ FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME INPUT))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT) key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL INPUT) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL INPUT) key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -44,54 +41,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - input + Map Operator Tree: TableScan alias: input + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5) - type: string + key expressions: substr(value, 5) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(value, 5) - type: string - tag: -1 - value expressions: - expr: key - type: int + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(KEY._col0) - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: int + aggregations: count(KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: sum(KEY._col0) - expr: sum(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: int + aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -99,46 +81,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: count(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -160,46 +124,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: double - expr: _col2 - type: double + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -323,9 +269,6 @@ POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, t POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME INPUT))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT) key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL INPUT) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL INPUT) key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -338,54 +281,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - input + Map Operator Tree: TableScan alias: input + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5) - type: string + key expressions: substr(value, 5) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(value, 5) - type: string - tag: -1 - value expressions: - expr: key - type: int + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(KEY._col0) - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: int + aggregations: count(KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: sum(KEY._col0) - expr: sum(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: int + aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -393,46 +321,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: count(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -454,46 +364,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: double - expr: _col2 - type: double + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -643,9 +535,6 @@ POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, t POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME INPUT))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT) key)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL INPUT) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT) key)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI avg (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL INPUT) key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -658,54 +547,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - input + Map Operator Tree: TableScan alias: input + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5) - type: string + key expressions: substr(value, 5) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(value, 5) - type: string - tag: -1 - value expressions: - expr: key - type: int + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(DISTINCT KEY._col0) - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: int + aggregations: sum(DISTINCT KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: sum(DISTINCT KEY._col0) - expr: avg(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: int + aggregations: sum(DISTINCT KEY._col0), avg(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -713,46 +587,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: double - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: count(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -774,46 +630,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: double - expr: _col2 - type: struct + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: avg(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: sum(VALUE._col0), avg(VALUE._col1) + keys: KEY._col0 (type: int) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby11.q.out ql/src/test/results/clientpositive/groupby11.q.out index eae210f..69921c2 100644 --- ql/src/test/results/clientpositive/groupby11.q.out +++ ql/src/test/results/clientpositive/groupby11.q.out @@ -22,9 +22,6 @@ INSERT OVERWRITE TABLE dest1 partition(ds='111') INSERT OVERWRITE TABLE dest2 partition(ds='111') SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1) (TOK_PARTSPEC (TOK_PARTVAL ds '111')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value)) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) key))) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) key)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2) (TOK_PARTSPEC (TOK_PARTVAL ds '111')))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) key))) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) key)))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -37,56 +34,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: -1 - value expressions: - expr: value - type: string - expr: substr(value, 5) - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string), substr(value, 5) (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(KEY._col0) - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(KEY._col0) - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col1 - type: string + aggregations: count(KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -94,46 +74,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: count(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -157,46 +119,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: count(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby12.q.out ql/src/test/results/clientpositive/groupby12.q.out index 40d5f58..5a3a4e9 100644 --- ql/src/test/results/clientpositive/groupby12.q.out +++ ql/src/test/results/clientpositive/groupby12.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL src) key))) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,49 +19,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - tag: -1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count(KEY._col0) - expr: count(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(KEY._col0), count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col1) - type: int - expr: _col2 - type: bigint + expressions: UDFToInteger(_col1) (type: int), _col2 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby1_limit.q.out ql/src/test/results/clientpositive/groupby1_limit.q.out index 81a4081..4d09bf2 100644 --- ql/src/test/results/clientpositive/groupby1_limit.q.out +++ ql/src/test/results/clientpositive/groupby1_limit.q.out @@ -9,9 +9,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key LIMIT 5 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key)) (TOK_LIMIT 5))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -21,59 +18,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(substr(value, 5)) - bucketGroup: false - keys: - expr: key - type: string + aggregations: sum(substr(value, 5)) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -81,30 +61,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: double + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Extract + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby1_map.q.out ql/src/test/results/clientpositive/groupby1_map.q.out index 2f10f68..5bc1dda 100644 --- ql/src/test/results/clientpositive/groupby1_map.q.out +++ ql/src/test/results/clientpositive/groupby1_map.q.out @@ -9,9 +9,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -20,58 +17,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(substr(value, 5)) - bucketGroup: false - keys: - expr: key - type: string + aggregations: sum(substr(value, 5)) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby1_map_nomap.q.out ql/src/test/results/clientpositive/groupby1_map_nomap.q.out index 2f10f68..5bc1dda 100644 --- ql/src/test/results/clientpositive/groupby1_map_nomap.q.out +++ ql/src/test/results/clientpositive/groupby1_map_nomap.q.out @@ -9,9 +9,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -20,58 +17,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(substr(value, 5)) - bucketGroup: false - keys: - expr: key - type: string + aggregations: sum(substr(value, 5)) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby1_map_skew.q.out ql/src/test/results/clientpositive/groupby1_map_skew.q.out index c51a925..101f4a4 100644 --- ql/src/test/results/clientpositive/groupby1_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby1_map_skew.q.out @@ -9,9 +9,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -21,51 +18,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(substr(value, 5)) - bucketGroup: false - keys: - expr: key - type: string + aggregations: sum(substr(value, 5)) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -73,41 +54,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby1_noskew.q.out ql/src/test/results/clientpositive/groupby1_noskew.q.out index a3a21f7..f7fcc28 100644 --- ql/src/test/results/clientpositive/groupby1_noskew.q.out +++ ql/src/test/results/clientpositive/groupby1_noskew.q.out @@ -9,9 +9,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -20,49 +17,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: -1 - value expressions: - expr: substr(value, 5) - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: substr(value, 5) (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby2.q.out ql/src/test/results/clientpositive/groupby2.q.out index 88d69d2..68da953 100644 --- ql/src/test/results/clientpositive/groupby2.q.out +++ ql/src/test/results/clientpositive/groupby2.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,51 +19,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(key, 1, 1) - type: string - expr: substr(value, 5) - type: string + key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) sort order: ++ - Map-reduce partition columns: - expr: substr(key, 1, 1) - type: string - tag: -1 + Map-reduce partition columns: substr(key, 1, 1) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby2_limit.q.out ql/src/test/results/clientpositive/groupby2_limit.q.out index 9a4f756..f2d33f6 100644 --- ql/src/test/results/clientpositive/groupby2_limit.q.out +++ ql/src/test/results/clientpositive/groupby2_limit.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key LIMIT 5 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key)) (TOK_LIMIT 5))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,59 +11,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(substr(value, 5)) - bucketGroup: false - keys: - expr: key - type: string + aggregations: sum(substr(value, 5)) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby2_map.q.out ql/src/test/results/clientpositive/groupby2_map.q.out index c4e5a6a..c63dc5e 100644 --- ql/src/test/results/clientpositive/groupby2_map.q.out +++ ql/src/test/results/clientpositive/groupby2_map.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,68 +19,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT substr(value, 5)) - expr: sum(substr(value, 5)) - bucketGroup: false - keys: - expr: substr(key, 1, 1) - type: string - expr: substr(value, 5) - type: string + aggregations: count(DISTINCT substr(value, 5)), sum(substr(value, 5)) + keys: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint - expr: _col3 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out index 3d3e909..d99b794 100644 --- ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,80 +19,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT substr(value, 5)) - expr: sum(substr(value, 5)) - expr: sum(DISTINCT substr(value, 5)) - expr: count(value) - bucketGroup: false - keys: - expr: substr(key, 1, 1) - type: string - expr: substr(value, 5) - type: string + aggregations: count(DISTINCT substr(value, 5)), sum(substr(value, 5)), sum(DISTINCT substr(value, 5)), count(value) + keys: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint - expr: _col3 - type: double - expr: _col4 - type: double - expr: _col5 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(VALUE._col1) - expr: sum(DISTINCT KEY._col1:1._col0) - expr: count(VALUE._col3) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string - expr: UDFToInteger(_col3) - type: int - expr: UDFToInteger(_col4) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -170,9 +127,6 @@ POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -181,80 +135,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT substr(key, 1, 1)) - expr: sum(substr(value, 5)) - expr: sum(DISTINCT substr(value, 5)) - expr: count(value) - bucketGroup: false - keys: - expr: substr(key, 1, 1) - type: string - expr: substr(value, 5) - type: string + aggregations: count(DISTINCT substr(key, 1, 1)), sum(substr(value, 5)), sum(DISTINCT substr(value, 5)), count(value) + keys: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint - expr: _col3 - type: double - expr: _col4 - type: double - expr: _col5 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(VALUE._col1) - expr: sum(DISTINCT KEY._col1:1._col0) - expr: count(VALUE._col3) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string - expr: UDFToInteger(_col3) - type: int - expr: UDFToInteger(_col4) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby2_map_skew.q.out ql/src/test/results/clientpositive/groupby2_map_skew.q.out index 973bf25..bbccd08 100644 --- ql/src/test/results/clientpositive/groupby2_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby2_map_skew.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,68 +19,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT substr(value, 5)) - expr: sum(substr(value, 5)) - bucketGroup: false - keys: - expr: substr(key, 1, 1) - type: string - expr: substr(value, 5) - type: string + aggregations: count(DISTINCT substr(value, 5)), sum(substr(value, 5)) + keys: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint - expr: _col3 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby2_noskew.q.out ql/src/test/results/clientpositive/groupby2_noskew.q.out index 88d69d2..68da953 100644 --- ql/src/test/results/clientpositive/groupby2_noskew.q.out +++ ql/src/test/results/clientpositive/groupby2_noskew.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,51 +19,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(key, 1, 1) - type: string - expr: substr(value, 5) - type: string + key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) sort order: ++ - Map-reduce partition columns: - expr: substr(key, 1, 1) - type: string - tag: -1 + Map-reduce partition columns: substr(key, 1, 1) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out index 0251b8d..5dd2d79 100644 --- ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,60 +19,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(key, 1, 1) - type: string - expr: substr(value, 5) - type: string + key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) sort order: ++ - Map-reduce partition columns: - expr: substr(key, 1, 1) - type: string - tag: -1 - value expressions: - expr: value - type: string + Map-reduce partition columns: substr(key, 1, 1) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(KEY._col1:0._col0) - expr: sum(DISTINCT KEY._col1:1._col0) - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string - expr: UDFToInteger(_col3) - type: int - expr: UDFToInteger(_col4) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby3.q.out ql/src/test/results/clientpositive/groupby3.q.out index cac87e6..a74f2b5 100644 --- ql/src/test/results/clientpositive/groupby3.q.out +++ ql/src/test/results/clientpositive/groupby3.q.out @@ -29,9 +29,6 @@ INSERT OVERWRITE TABLE dest1 SELECT variance(substr(src.value,5)), var_samp(substr(src.value,5)) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI avg (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION std (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION stddev_samp (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION variance (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION var_samp (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -41,42 +38,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5) - type: string + key expressions: substr(value, 5) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(value, 5) - type: string - tag: -1 + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(KEY._col0:0._col0) - expr: avg(KEY._col0:0._col0) - expr: avg(DISTINCT KEY._col0:0._col0) - expr: max(KEY._col0:0._col0) - expr: min(KEY._col0:0._col0) - expr: std(KEY._col0:0._col0) - expr: stddev_samp(KEY._col0:0._col0) - expr: variance(KEY._col0:0._col0) - expr: var_samp(KEY._col0:0._col0) - bucketGroup: false + aggregations: sum(KEY._col0:0._col0), avg(KEY._col0:0._col0), avg(DISTINCT KEY._col0:0._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), std(KEY._col0:0._col0), stddev_samp(KEY._col0:0._col0), variance(KEY._col0:0._col0), var_samp(KEY._col0:0._col0) mode: partial1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -84,70 +66,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: double - expr: _col1 - type: struct - expr: _col2 - type: struct - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: struct - expr: _col6 - type: struct - expr: _col7 - type: struct - expr: _col8 - type: struct + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: avg(VALUE._col1) - expr: avg(VALUE._col2) - expr: max(VALUE._col3) - expr: min(VALUE._col4) - expr: std(VALUE._col5) - expr: stddev_samp(VALUE._col6) - expr: variance(VALUE._col7) - expr: var_samp(VALUE._col8) - bucketGroup: false + aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) mode: final outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: double - expr: _col2 - type: double - expr: UDFToDouble(_col3) - type: double - expr: UDFToDouble(_col4) - type: double - expr: _col5 - type: double - expr: _col6 - type: double - expr: _col7 - type: double - expr: _col8 - type: double + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby3_map.q.out ql/src/test/results/clientpositive/groupby3_map.q.out index f63eab6..9424071 100644 --- ql/src/test/results/clientpositive/groupby3_map.q.out +++ ql/src/test/results/clientpositive/groupby3_map.q.out @@ -29,9 +29,6 @@ INSERT OVERWRITE TABLE dest1 SELECT variance(substr(src.value,5)), var_samp(substr(src.value,5)) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI avg (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION std (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION stddev_samp (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION variance (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION var_samp (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -40,96 +37,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(substr(value, 5)) - expr: avg(substr(value, 5)) - expr: avg(DISTINCT substr(value, 5)) - expr: max(substr(value, 5)) - expr: min(substr(value, 5)) - expr: std(substr(value, 5)) - expr: stddev_samp(substr(value, 5)) - expr: variance(substr(value, 5)) - expr: var_samp(substr(value, 5)) - bucketGroup: false - keys: - expr: substr(value, 5) - type: string + aggregations: sum(substr(value, 5)), avg(substr(value, 5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), variance(substr(value, 5)), var_samp(substr(value, 5)) + keys: substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col1 - type: double - expr: _col2 - type: struct - expr: _col3 - type: struct - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: struct - expr: _col7 - type: struct - expr: _col8 - type: struct - expr: _col9 - type: struct + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: avg(VALUE._col1) - expr: avg(DISTINCT KEY._col0:0._col0) - expr: max(VALUE._col3) - expr: min(VALUE._col4) - expr: std(VALUE._col5) - expr: stddev_samp(VALUE._col6) - expr: variance(VALUE._col7) - expr: var_samp(VALUE._col8) - bucketGroup: false + aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(DISTINCT KEY._col0:0._col0), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: double - expr: _col2 - type: double - expr: UDFToDouble(_col3) - type: double - expr: UDFToDouble(_col4) - type: double - expr: _col5 - type: double - expr: _col6 - type: double - expr: _col7 - type: double - expr: _col8 - type: double + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out index ea0e616..9bcd7c9 100644 --- ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out @@ -33,9 +33,6 @@ INSERT OVERWRITE TABLE dest1 SELECT sum(DISTINCT substr(src.value, 5)), count(DISTINCT substr(src.value, 5)) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI avg (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION std (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION stddev_samp (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION variance (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION var_samp (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -44,108 +41,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(substr(value, 5)) - expr: avg(substr(value, 5)) - expr: avg(DISTINCT substr(value, 5)) - expr: max(substr(value, 5)) - expr: min(substr(value, 5)) - expr: std(substr(value, 5)) - expr: stddev_samp(substr(value, 5)) - expr: variance(substr(value, 5)) - expr: var_samp(substr(value, 5)) - expr: sum(DISTINCT substr(value, 5)) - expr: count(DISTINCT substr(value, 5)) - bucketGroup: false - keys: - expr: substr(value, 5) - type: string + aggregations: sum(substr(value, 5)), avg(substr(value, 5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), variance(substr(value, 5)), var_samp(substr(value, 5)), sum(DISTINCT substr(value, 5)), count(DISTINCT substr(value, 5)) + keys: substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col1 - type: double - expr: _col2 - type: struct - expr: _col3 - type: struct - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: struct - expr: _col7 - type: struct - expr: _col8 - type: struct - expr: _col9 - type: struct - expr: _col10 - type: double - expr: _col11 - type: bigint + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: double), _col11 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: avg(VALUE._col1) - expr: avg(DISTINCT KEY._col0:0._col0) - expr: max(VALUE._col3) - expr: min(VALUE._col4) - expr: std(VALUE._col5) - expr: stddev_samp(VALUE._col6) - expr: variance(VALUE._col7) - expr: var_samp(VALUE._col8) - expr: sum(DISTINCT KEY._col0:1._col0) - expr: count(DISTINCT KEY._col0:2._col0) - bucketGroup: false + aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(DISTINCT KEY._col0:0._col0), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8), sum(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: double - expr: _col2 - type: double - expr: UDFToDouble(_col3) - type: double - expr: UDFToDouble(_col4) - type: double - expr: _col5 - type: double - expr: _col6 - type: double - expr: _col7 - type: double - expr: _col8 - type: double - expr: _col9 - type: double - expr: UDFToDouble(_col10) - type: double + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), UDFToDouble(_col10) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 248 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby3_map_skew.q.out ql/src/test/results/clientpositive/groupby3_map_skew.q.out index 69175a8..f438f89 100644 --- ql/src/test/results/clientpositive/groupby3_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby3_map_skew.q.out @@ -29,9 +29,6 @@ INSERT OVERWRITE TABLE dest1 SELECT variance(substr(src.value,5)), var_samp(substr(src.value,5)) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI avg (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION std (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION stddev_samp (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION variance (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION var_samp (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -41,78 +38,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(substr(value, 5)) - expr: avg(substr(value, 5)) - expr: avg(DISTINCT substr(value, 5)) - expr: max(substr(value, 5)) - expr: min(substr(value, 5)) - expr: std(substr(value, 5)) - expr: stddev_samp(substr(value, 5)) - expr: variance(substr(value, 5)) - expr: var_samp(substr(value, 5)) - bucketGroup: false - keys: - expr: substr(value, 5) - type: string + aggregations: sum(substr(value, 5)), avg(substr(value, 5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), variance(substr(value, 5)), var_samp(substr(value, 5)) + keys: substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double - expr: _col2 - type: struct - expr: _col3 - type: struct - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: struct - expr: _col7 - type: struct - expr: _col8 - type: struct - expr: _col9 - type: struct + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: avg(VALUE._col1) - expr: avg(DISTINCT KEY._col0:0._col0) - expr: max(VALUE._col3) - expr: min(VALUE._col4) - expr: std(VALUE._col5) - expr: stddev_samp(VALUE._col6) - expr: variance(VALUE._col7) - expr: var_samp(VALUE._col8) - bucketGroup: false + aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(DISTINCT KEY._col0:0._col0), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) mode: partials outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -120,70 +73,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: double - expr: _col1 - type: struct - expr: _col2 - type: struct - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: struct - expr: _col6 - type: struct - expr: _col7 - type: struct - expr: _col8 - type: struct + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: avg(VALUE._col1) - expr: avg(VALUE._col2) - expr: max(VALUE._col3) - expr: min(VALUE._col4) - expr: std(VALUE._col5) - expr: stddev_samp(VALUE._col6) - expr: variance(VALUE._col7) - expr: var_samp(VALUE._col8) - bucketGroup: false + aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) mode: final outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: double - expr: _col2 - type: double - expr: UDFToDouble(_col3) - type: double - expr: UDFToDouble(_col4) - type: double - expr: _col5 - type: double - expr: _col6 - type: double - expr: _col7 - type: double - expr: _col8 - type: double + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby3_noskew.q.out ql/src/test/results/clientpositive/groupby3_noskew.q.out index 7df594b..c461da3 100644 --- ql/src/test/results/clientpositive/groupby3_noskew.q.out +++ ql/src/test/results/clientpositive/groupby3_noskew.q.out @@ -29,9 +29,6 @@ INSERT OVERWRITE TABLE dest1 SELECT variance(substr(src.value,5)), var_samp(substr(src.value,5)) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI avg (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION std (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION stddev_samp (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION variance (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION var_samp (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -40,60 +37,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5) - type: string + key expressions: substr(value, 5) (type: string) sort order: + - tag: -1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(KEY._col0:0._col0) - expr: avg(KEY._col0:0._col0) - expr: avg(DISTINCT KEY._col0:0._col0) - expr: max(KEY._col0:0._col0) - expr: min(KEY._col0:0._col0) - expr: std(KEY._col0:0._col0) - expr: stddev_samp(KEY._col0:0._col0) - expr: variance(KEY._col0:0._col0) - expr: var_samp(KEY._col0:0._col0) - bucketGroup: false + aggregations: sum(KEY._col0:0._col0), avg(KEY._col0:0._col0), avg(DISTINCT KEY._col0:0._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), std(KEY._col0:0._col0), stddev_samp(KEY._col0:0._col0), variance(KEY._col0:0._col0), var_samp(KEY._col0:0._col0) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: double - expr: _col2 - type: double - expr: UDFToDouble(_col3) - type: double - expr: UDFToDouble(_col4) - type: double - expr: _col5 - type: double - expr: _col6 - type: double - expr: _col7 - type: double - expr: _col8 - type: double + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out index 18e2ef8..6ba3342 100644 --- ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out @@ -33,9 +33,6 @@ INSERT OVERWRITE TABLE dest1 SELECT sum(DISTINCT substr(src.value, 5)), count(DISTINCT substr(src.value, 5)) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI avg (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION std (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION stddev_samp (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION variance (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION var_samp (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -44,66 +41,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5) - type: string + key expressions: substr(value, 5) (type: string) sort order: + - tag: -1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(KEY._col0:0._col0) - expr: avg(KEY._col0:0._col0) - expr: avg(DISTINCT KEY._col0:0._col0) - expr: max(KEY._col0:0._col0) - expr: min(KEY._col0:0._col0) - expr: std(KEY._col0:0._col0) - expr: stddev_samp(KEY._col0:0._col0) - expr: variance(KEY._col0:0._col0) - expr: var_samp(KEY._col0:0._col0) - expr: sum(DISTINCT KEY._col0:1._col0) - expr: count(DISTINCT KEY._col0:2._col0) - bucketGroup: false + aggregations: sum(KEY._col0:0._col0), avg(KEY._col0:0._col0), avg(DISTINCT KEY._col0:0._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), std(KEY._col0:0._col0), stddev_samp(KEY._col0:0._col0), variance(KEY._col0:0._col0), var_samp(KEY._col0:0._col0), sum(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: double - expr: _col2 - type: double - expr: UDFToDouble(_col3) - type: double - expr: UDFToDouble(_col4) - type: double - expr: _col5 - type: double - expr: _col6 - type: double - expr: _col7 - type: double - expr: _col8 - type: double - expr: _col9 - type: double - expr: UDFToDouble(_col10) - type: double + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), UDFToDouble(_col10) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby4.q.out ql/src/test/results/clientpositive/groupby4.q.out index cd9271c..7e5e621 100644 --- ql/src/test/results/clientpositive/groupby4.q.out +++ ql/src/test/results/clientpositive/groupby4.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -23,35 +20,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(key, 1, 1) - type: string + key expressions: substr(key, 1, 1) (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -59,34 +48,26 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby4_map.q.out ql/src/test/results/clientpositive/groupby4_map.q.out index 9151c5e..ac68f53 100644 --- ql/src/test/results/clientpositive/groupby4_map.q.out +++ ql/src/test/results/clientpositive/groupby4_map.q.out @@ -9,9 +9,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -20,38 +17,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int + expressions: UDFToInteger(_col0) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby4_map_skew.q.out ql/src/test/results/clientpositive/groupby4_map_skew.q.out index 8a7304b..e4183e8 100644 --- ql/src/test/results/clientpositive/groupby4_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby4_map_skew.q.out @@ -9,9 +9,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -20,38 +17,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: final outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int + expressions: UDFToInteger(_col0) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby4_noskew.q.out ql/src/test/results/clientpositive/groupby4_noskew.q.out index a17c764..559353a 100644 --- ql/src/test/results/clientpositive/groupby4_noskew.q.out +++ ql/src/test/results/clientpositive/groupby4_noskew.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,40 +19,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(key, 1, 1) - type: string + key expressions: substr(key, 1, 1) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(key, 1, 1) - type: string - tag: -1 + Map-reduce partition columns: substr(key, 1, 1) (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby5.q.out ql/src/test/results/clientpositive/groupby5.q.out index 6a6a2cb..5e91c1e 100644 --- ql/src/test/results/clientpositive/groupby5.q.out +++ ql/src/test/results/clientpositive/groupby5.q.out @@ -15,9 +15,6 @@ SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -27,42 +24,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: substr(value, 5) - type: string + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: substr(value, 5) (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -70,41 +54,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby5_map.q.out ql/src/test/results/clientpositive/groupby5_map.q.out index 96b50e9..a021874 100644 --- ql/src/test/results/clientpositive/groupby5_map.q.out +++ ql/src/test/results/clientpositive/groupby5_map.q.out @@ -9,9 +9,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum (. (TOK_TABLE_OR_COL src) key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -20,42 +17,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(key) - bucketGroup: false + aggregations: sum(key) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: double + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false + aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int + expressions: UDFToInteger(_col0) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby5_map_skew.q.out ql/src/test/results/clientpositive/groupby5_map_skew.q.out index 48a90ff..9beca3f 100644 --- ql/src/test/results/clientpositive/groupby5_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby5_map_skew.q.out @@ -9,9 +9,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum (. (TOK_TABLE_OR_COL src) key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -20,42 +17,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(key) - bucketGroup: false + aggregations: sum(key) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: double + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false + aggregations: sum(VALUE._col0) mode: final outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int + expressions: UDFToInteger(_col0) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby5_noskew.q.out ql/src/test/results/clientpositive/groupby5_noskew.q.out index 0feebee..fb2197d 100644 --- ql/src/test/results/clientpositive/groupby5_noskew.q.out +++ ql/src/test/results/clientpositive/groupby5_noskew.q.out @@ -15,9 +15,6 @@ SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -26,49 +23,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: -1 - value expressions: - expr: substr(value, 5) - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: substr(value, 5) (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby6.q.out ql/src/test/results/clientpositive/groupby6.q.out index ce3cea8..16d58e9 100644 --- ql/src/test/results/clientpositive/groupby6.q.out +++ ql/src/test/results/clientpositive/groupby6.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECTDI (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -23,35 +20,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5, 1) - type: string + key expressions: substr(value, 5, 1) (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -59,34 +48,26 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby6_map.q.out ql/src/test/results/clientpositive/groupby6_map.q.out index 6a28f9f..4623771 100644 --- ql/src/test/results/clientpositive/groupby6_map.q.out +++ ql/src/test/results/clientpositive/groupby6_map.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECTDI (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,47 +19,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: substr(value, 5, 1) - type: string + keys: substr(value, 5, 1) (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby6_map_skew.q.out ql/src/test/results/clientpositive/groupby6_map_skew.q.out index d9f440a..b67c751 100644 --- ql/src/test/results/clientpositive/groupby6_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby6_map_skew.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECTDI (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -23,42 +20,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: substr(value, 5, 1) - type: string + keys: substr(value, 5, 1) (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -66,34 +53,26 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby6_noskew.q.out ql/src/test/results/clientpositive/groupby6_noskew.q.out index a7a6f55..6b1501d 100644 --- ql/src/test/results/clientpositive/groupby6_noskew.q.out +++ ql/src/test/results/clientpositive/groupby6_noskew.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECTDI (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,40 +19,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5, 1) - type: string + key expressions: substr(value, 5, 1) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(value, 5, 1) - type: string - tag: -1 + Map-reduce partition columns: substr(value, 5, 1) (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby7_map.q.out ql/src/test/results/clientpositive/groupby7_map.q.out index 5dcb657..488e3c6 100644 --- ql/src/test/results/clientpositive/groupby7_map.q.out +++ ql/src/test/results/clientpositive/groupby7_map.q.out @@ -18,9 +18,6 @@ FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -32,81 +29,56 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(substr(value, 5)) - bucketGroup: false - keys: - expr: key - type: string + aggregations: sum(substr(value, 5)) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(substr(value, 5)) - bucketGroup: false - keys: - expr: key - type: string + aggregations: sum(substr(value, 5)) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -128,41 +100,28 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out index 03ad6ba..ad3bc4c 100644 --- ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out @@ -18,9 +18,6 @@ FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -31,74 +28,54 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: -1 - value expressions: - expr: substr(value, 5) - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: substr(value, 5) (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby7_map_skew.q.out ql/src/test/results/clientpositive/groupby7_map_skew.q.out index 8dc2133..b23121b 100644 --- ql/src/test/results/clientpositive/groupby7_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby7_map_skew.q.out @@ -18,9 +18,6 @@ FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -34,74 +31,51 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(substr(value, 5)) - bucketGroup: false - keys: - expr: key - type: string + aggregations: sum(substr(value, 5)) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(substr(value, 5)) - bucketGroup: false - keys: - expr: key - type: string + aggregations: sum(substr(value, 5)) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -109,41 +83,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -165,34 +126,23 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -200,41 +150,28 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby7_noskew.q.out ql/src/test/results/clientpositive/groupby7_noskew.q.out index 68ffc8e..eb13bb8 100644 --- ql/src/test/results/clientpositive/groupby7_noskew.q.out +++ ql/src/test/results/clientpositive/groupby7_noskew.q.out @@ -18,9 +18,6 @@ FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -32,63 +29,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: -1 - value expressions: - expr: substr(value, 5) - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: substr(value, 5) (type: string) Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -110,41 +88,28 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: -1 - value expressions: - expr: substr(value, 5) - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: substr(value, 5) (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out index f62440c..5fcc176 100644 --- ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out @@ -18,9 +18,6 @@ FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key limit 10 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)) (TOK_LIMIT 10)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -33,75 +30,57 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: -1 - value expressions: - expr: substr(value, 5) - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: substr(value, 5) (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -109,30 +88,25 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: double + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -154,30 +128,25 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: double + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: double + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 2 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby8.q.out ql/src/test/results/clientpositive/groupby8.q.out index 14c06b8..960a2bd 100644 --- ql/src/test/results/clientpositive/groupby8.q.out +++ ql/src/test/results/clientpositive/groupby8.q.out @@ -18,9 +18,6 @@ FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -33,52 +30,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5) - type: string + key expressions: substr(value, 5) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(value, 5) - type: string - tag: -1 - value expressions: - expr: key - type: string + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -86,41 +70,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -142,41 +113,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -870,9 +828,6 @@ POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:str POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -885,52 +840,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5) - type: string + key expressions: substr(value, 5) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(value, 5) - type: string - tag: -1 - value expressions: - expr: key - type: string + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -938,41 +880,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -994,41 +923,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby8_map.q.out ql/src/test/results/clientpositive/groupby8_map.q.out index d627ca2..fc429b2 100644 --- ql/src/test/results/clientpositive/groupby8_map.q.out +++ ql/src/test/results/clientpositive/groupby8_map.q.out @@ -18,9 +18,6 @@ FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -33,52 +30,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5) - type: string + key expressions: substr(value, 5) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(value, 5) - type: string - tag: -1 - value expressions: - expr: key - type: string + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -86,41 +70,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -142,41 +113,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby8_map_skew.q.out ql/src/test/results/clientpositive/groupby8_map_skew.q.out index d627ca2..fc429b2 100644 --- ql/src/test/results/clientpositive/groupby8_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby8_map_skew.q.out @@ -18,9 +18,6 @@ FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -33,52 +30,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5) - type: string + key expressions: substr(value, 5) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(value, 5) - type: string - tag: -1 - value expressions: - expr: key - type: string + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -86,41 +70,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -142,41 +113,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby8_noskew.q.out ql/src/test/results/clientpositive/groupby8_noskew.q.out index d627ca2..fc429b2 100644 --- ql/src/test/results/clientpositive/groupby8_noskew.q.out +++ ql/src/test/results/clientpositive/groupby8_noskew.q.out @@ -18,9 +18,6 @@ FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -33,52 +30,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5) - type: string + key expressions: substr(value, 5) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(value, 5) - type: string - tag: -1 - value expressions: - expr: key - type: string + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -86,41 +70,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -142,41 +113,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby9.q.out ql/src/test/results/clientpositive/groupby9.q.out index 99f1ec0..e5caf02 100644 --- ql/src/test/results/clientpositive/groupby9.q.out +++ ql/src/test/results/clientpositive/groupby9.q.out @@ -18,9 +18,6 @@ FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -33,56 +30,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5) - type: string + key expressions: substr(value, 5) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(value, 5) - type: string - tag: -1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string - expr: VALUE._col1 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string), VALUE._col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -90,41 +70,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -146,49 +113,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -886,9 +832,6 @@ POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) value) (. (TOK_TABLE_OR_COL SRC) key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -901,56 +844,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5) - type: string + key expressions: substr(value, 5) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(value, 5) - type: string - tag: -1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col1 - type: string - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col1 (type: string), VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -958,41 +884,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1014,49 +927,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col1) - type: int - expr: _col0 - type: string - expr: _col2 - type: bigint + expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1774,9 +1666,6 @@ POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -1789,56 +1678,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5) - type: string + key expressions: substr(value, 5) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(value, 5) - type: string - tag: -1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string - expr: VALUE._col1 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string), VALUE._col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1846,41 +1718,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1902,49 +1761,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2682,9 +2520,6 @@ POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTION COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -2696,83 +2531,56 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(substr(value, 5)) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(substr(value, 5)) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(substr(value, 5)) - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + aggregations: count(substr(value, 5)) + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2794,49 +2602,28 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3594,9 +3381,6 @@ POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type: POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) value) (. (TOK_TABLE_OR_COL SRC) key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -3609,56 +3393,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(value, 5) - type: string + key expressions: substr(value, 5) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(value, 5) - type: string - tag: -1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col1 - type: string - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col1 (type: string), VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3666,41 +3433,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3722,49 +3476,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col1) - type: int - expr: _col0 - type: string - expr: _col2 - type: bigint + expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_complex_types.q.out ql/src/test/results/clientpositive/groupby_complex_types.q.out index 4548fdd..16f742e 100644 --- ql/src/test/results/clientpositive/groupby_complex_types.q.out +++ ql/src/test/results/clientpositive/groupby_complex_types.q.out @@ -25,9 +25,6 @@ INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC. INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) INSERT OVERWRITE TABLE DEST3 SELECT STRUCT(SRC.key, SRC.value), COUNT(1) GROUP BY STRUCT(SRC.key, SRC.value) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION ARRAY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_SELEXPR (TOK_FUNCTION COUNT 1))) (TOK_GROUPBY (TOK_FUNCTION ARRAY (. (TOK_TABLE_OR_COL SRC) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION MAP (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value))) (TOK_SELEXPR (TOK_FUNCTION COUNT 1))) (TOK_GROUPBY (TOK_FUNCTION MAP (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST3))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION STRUCT (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value))) (TOK_SELEXPR (TOK_FUNCTION COUNT 1))) (TOK_GROUPBY (TOK_FUNCTION STRUCT (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 @@ -42,102 +39,72 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: array(key) - type: array + aggregations: count(1) + keys: array(key) (type: array) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: array + key expressions: _col0 (type: array) sort order: + - Map-reduce partition columns: - expr: _col0 - type: array - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: array) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: map(key:value) - type: map + aggregations: count(1) + keys: map(key:value) (type: map) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: struct(key,value) - type: struct + aggregations: count(1) + keys: struct(key,value) (type: struct) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: array + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: array) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: array - expr: _col1 - type: bigint + expressions: _col0 (type: array), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -159,41 +126,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: map + key expressions: _col0 (type: map) sort order: + - Map-reduce partition columns: - expr: _col0 - type: map - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: map) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: map + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: map) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: map - expr: _col1 - type: bigint + expressions: _col0 (type: map), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -215,41 +169,28 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: struct + key expressions: _col0 (type: struct) sort order: + - Map-reduce partition columns: - expr: _col0 - type: struct - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: struct + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: struct - expr: _col1 - type: bigint + expressions: _col0 (type: struct), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out index a0f9156..fd98abd 100644 --- ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out @@ -18,9 +18,6 @@ FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION ARRAY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_SELEXPR (TOK_FUNCTION COUNT 1))) (TOK_GROUPBY (TOK_FUNCTION ARRAY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_LIMIT 10)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION MAP (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value))) (TOK_SELEXPR (TOK_FUNCTION COUNT 1))) (TOK_GROUPBY (TOK_FUNCTION MAP (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -34,80 +31,58 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: array(key) - type: array + aggregations: count(1) + keys: array(key) (type: array) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: array + key expressions: _col0 (type: array) sort order: + - Map-reduce partition columns: - expr: _col0 - type: array - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: array) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: map(key:value) - type: map + aggregations: count(1) + keys: map(key:value) (type: map) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: array + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: array) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: array - expr: _col1 - type: bigint + expressions: _col0 (type: array), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -115,23 +90,21 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: array - expr: _col1 - type: bigint + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: array), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -153,42 +126,30 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: map + key expressions: _col0 (type: map) sort order: + - Map-reduce partition columns: - expr: _col0 - type: map - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: map) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: map + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: map) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: map - expr: _col1 - type: bigint + expressions: _col0 (type: map), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -196,23 +157,21 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: map - expr: _col1 - type: bigint + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: map), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_cube1.q.out ql/src/test/results/clientpositive/groupby_cube1.q.out index d4e2388..4a92086 100644 --- ql/src/test/results/clientpositive/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/groupby_cube1.q.out @@ -15,9 +15,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -25,76 +22,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: val - type: string - expr: '0' - type: string + aggregations: count(1) + keys: key (type: string), val (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -138,9 +99,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key, count(distinct val) FROM T1 GROUP BY key with cube POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL val)))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -148,70 +106,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT val) - bucketGroup: false - keys: - expr: key - type: string - expr: '0' - type: string - expr: val - type: string + aggregations: count(DISTINCT val) + keys: key (type: string), '0' (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col2:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -243,9 +171,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -254,63 +179,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: val - type: string - expr: '0' - type: string + aggregations: count(1) + keys: key (type: string), val (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -318,53 +215,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -408,9 +280,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key, count(distinct val) FROM T1 GROUP BY key with cube POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL val)))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -418,68 +287,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT val) - bucketGroup: false - keys: - expr: key - type: string - expr: '0' - type: string - expr: val - type: string + aggregations: count(DISTINCT val) + keys: key (type: string), '0' (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col2:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -525,9 +366,6 @@ FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME T2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME T3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION sum 1))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -541,90 +379,51 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: val - type: string - expr: '0' - type: string + aggregations: count(1) + keys: key (type: string), val (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: sum(1) - bucketGroup: false - keys: - expr: key - type: string - expr: val - type: string - expr: '0' - type: string + aggregations: sum(1) + keys: key (type: string), val (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -632,53 +431,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: UDFToInteger(_col3) - type: int + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -700,42 +474,23 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -743,53 +498,28 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: UDFToInteger(_col3) - type: int + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out index 5cfbdeb..c28e31e 100644 --- ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out +++ ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out @@ -31,9 +31,6 @@ POSTHOOK: Lineage: t1.int1 EXPRESSION [(src)src.FieldSchema(name:key, type:strin POSTHOOK: Lineage: t1.int2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t1.str1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: t1.str2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL int1))))) Q1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL Q1) int1)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (. (TOK_TABLE_OR_COL Q1) int1)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL Q1) int1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -42,55 +39,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - q1:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: int1 - type: int - expr: int2 - type: int - expr: str1 - type: string - expr: str2 - type: string + expressions: int1 (type: int), int2 (type: int), str1 (type: string), str2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col3 - type: string + Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(DISTINCT _col0) - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: sum(DISTINCT _col0) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -98,41 +74,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 60 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -150,9 +113,6 @@ POSTHOOK: Lineage: t1.int1 EXPRESSION [(src)src.FieldSchema(name:key, type:strin POSTHOOK: Lineage: t1.int2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t1.str1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: t1.str2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL int1)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL int1)))) (TOK_GROUPBY (TOK_TABLE_OR_COL int1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -160,56 +120,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: int1 - type: int + expressions: int1 (type: int) outputColumnNames: int1 + Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(DISTINCT int1) - bucketGroup: false - keys: - expr: int1 - type: int + aggregations: sum(DISTINCT int1) + keys: int1 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 60 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out index 6646722..745a40c 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out @@ -17,9 +17,6 @@ POSTHOOK: query: -- Since 4 grouping sets would be generated for the query below EXPLAIN SELECT a, b, count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -28,61 +25,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: a - type: string - expr: b - type: string + expressions: a (type: string), b (type: string) outputColumnNames: a, b + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: a - type: string - expr: b - type: string + aggregations: count() + keys: a (type: string), b (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: '0' - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -90,55 +61,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -177,9 +121,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT a, b, sum(c) from T1 group by a, b with cube POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL c)))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -188,63 +129,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: a - type: string - expr: b - type: string - expr: c - type: string + expressions: a (type: string), b (type: string), c (type: string) outputColumnNames: a, b, c + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: sum(c) - bucketGroup: false - keys: - expr: a - type: string - expr: b - type: string + aggregations: sum(c) + keys: a (type: string), b (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: double + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: '0' - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -252,55 +165,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col3 - type: double + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: double + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -362,9 +248,6 @@ POSTHOOK: Lineage: t2.a SIMPLE [(t1)t1.FieldSchema(name:a, type:string, comment: POSTHOOK: Lineage: t2.b SIMPLE [(t1)t1.FieldSchema(name:b, type:string, comment:null), ] POSTHOOK: Lineage: t2.c EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ] POSTHOOK: Lineage: t2.d EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTION sum (+ (TOK_TABLE_OR_COL c) (TOK_TABLE_OR_COL d))))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -373,65 +256,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t2 + Map Operator Tree: TableScan alias: t2 + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: a - type: string - expr: b - type: string - expr: c - type: int - expr: d - type: int + expressions: a (type: string), b (type: string), c (type: int), d (type: int) outputColumnNames: a, b, c, d + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum((c + d)) - bucketGroup: false - keys: - expr: a - type: string - expr: b - type: string + aggregations: sum((c + d)) + keys: a (type: string), b (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: '0' - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -439,55 +292,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out index 52ff17b..310a202 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out @@ -35,9 +35,6 @@ POSTHOOK: query: -- The query below will execute in a single MR job, since 4 row EXPLAIN SELECT a, b, avg(c), count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -45,84 +42,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: a - type: string - expr: b - type: string - expr: c - type: string + expressions: a (type: string), b (type: string), c (type: string) outputColumnNames: a, b, c + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: avg(c) - expr: count() - bucketGroup: false - keys: - expr: a - type: string - expr: b - type: string - expr: '0' - type: string + aggregations: avg(c), count() + keys: a (type: string), b (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col3 - type: struct - expr: _col4 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: avg(VALUE._col0) - expr: count(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: double - expr: _col4 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -166,9 +119,6 @@ POSTHOOK: query: -- The query below will execute in 2 MR jobs, since hive.new.jo EXPLAIN SELECT a, b, avg(c), count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -177,67 +127,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: a - type: string - expr: b - type: string - expr: c - type: string + expressions: a (type: string), b (type: string), c (type: string) outputColumnNames: a, b, c + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: avg(c) - expr: count() - bucketGroup: false - keys: - expr: a - type: string - expr: b - type: string + aggregations: avg(c), count() + keys: a (type: string), b (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: struct - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: avg(VALUE._col0) - expr: count(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: '0' - type: string + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -245,60 +163,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col3 - type: struct - expr: _col4 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: avg(VALUE._col0) - expr: count(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: double - expr: _col4 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out index 5810364..f5d3633 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out @@ -29,9 +29,6 @@ join (SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 on subq1.a = subq2.a order by subq1.a, subq1.b, subq2.a, subq2.b POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (TOK_TABLE_OR_COL a) 3)) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (TOK_TABLE_OR_COL a) 3)) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) a) (. (TOK_TABLE_OR_COL subq2) a)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq1) a)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq1) b)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq2) a)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq2) b))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -42,80 +39,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (a < 3) - type: boolean + predicate: (a < 3) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: a - type: string - expr: b - type: string + expressions: a (type: string), b (type: string) outputColumnNames: a, b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: a - type: string - expr: b - type: string - expr: '0' - type: string + aggregations: count() + keys: a (type: string), b (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -123,43 +82,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -167,26 +104,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -194,39 +119,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: string - expr: _col4 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: bigint + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), _col5 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -234,80 +139,42 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - subq2:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (a < 3) - type: boolean + predicate: (a < 3) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: a - type: string - expr: b - type: string + expressions: a (type: string), b (type: string) outputColumnNames: a, b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: a - type: string - expr: b - type: string - expr: '0' - type: string + aggregations: count() + keys: a (type: string), b (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -364,9 +231,6 @@ join (SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 on subq1.a = subq2.a order by subq1.a, subq1.b, subq2.a, subq2.b POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (TOK_TABLE_OR_COL a) 3)) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (TOK_TABLE_OR_COL a) 3)) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) a) (. (TOK_TABLE_OR_COL subq2) a)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq1) a)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq1) b)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq2) a)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq2) b))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -379,65 +243,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (a < 3) - type: boolean + predicate: (a < 3) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: a - type: string - expr: b - type: string + expressions: a (type: string), b (type: string) outputColumnNames: a, b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: a - type: string - expr: b - type: string + aggregations: count() + keys: a (type: string), b (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: '0' - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -445,55 +282,27 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -501,43 +310,21 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -545,26 +332,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -572,39 +347,19 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: string - expr: _col4 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: bigint + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), _col5 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -612,65 +367,38 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - subq2:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (a < 3) - type: boolean + predicate: (a < 3) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: a - type: string - expr: b - type: string + expressions: a (type: string), b (type: string) outputColumnNames: a, b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: a - type: string - expr: b - type: string + aggregations: count() + keys: a (type: string), b (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: '0' - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -678,55 +406,27 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out index 5c24e9b..dce3077 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out @@ -23,9 +23,6 @@ EXPLAIN SELECT a, b, count(*) FROM (SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -34,79 +31,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: a - type: string - expr: b - type: string + expressions: a (type: string), b (type: string) outputColumnNames: a, b + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: a - type: string - expr: b - type: string + aggregations: count(1) + keys: a (type: string), b (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string - expr: '0' - type: string + aggregations: count() + keys: _col0 (type: string), _col1 (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -114,55 +77,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -207,9 +143,6 @@ EXPLAIN SELECT a, b, count(*) FROM (SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -219,77 +152,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: a - type: string - expr: b - type: string + expressions: a (type: string), b (type: string) outputColumnNames: a, b + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: a - type: string - expr: b - type: string + aggregations: count(1) + keys: a (type: string), b (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -297,42 +198,23 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: '0' - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -340,55 +222,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_map_ppr.q.out ql/src/test/results/clientpositive/groupby_map_ppr.q.out index 09bef1d..89c7299 100644 --- ql/src/test/results/clientpositive/groupby_map_ppr.q.out +++ ql/src/test/results/clientpositive/groupby_map_ppr.q.out @@ -18,7 +18,75 @@ WHERE src.ds = '2008-04-08' GROUP BY substr(src.key,1,1) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) ds) '2008-04-08')) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest1 + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + key + 1 + 1 + TOK_SELEXPR + TOK_FUNCTIONDI + count + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + value + 5 + TOK_SELEXPR + TOK_FUNCTION + concat + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + key + 1 + 1 + TOK_FUNCTION + sum + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + value + 5 + TOK_WHERE + = + . + TOK_TABLE_OR_COL + src + ds + '2008-04-08' + TOK_GROUPBY + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + key + 1 + 1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -28,54 +96,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT substr(value, 5)) - expr: sum(substr(value, 5)) - bucketGroup: false - keys: - expr: substr(key, 1, 1) - type: string - expr: substr(value, 5) - type: string + aggregations: count(DISTINCT substr(value, 5)), sum(substr(value, 5)) + keys: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col2 - type: bigint - expr: _col3 - type: double + value expressions: _col2 (type: bigint), _col3 (type: double) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -169,35 +211,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out index e4d15de..12f1fcf 100644 --- ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out @@ -18,7 +18,92 @@ WHERE src.ds = '2008-04-08' GROUP BY substr(src.key,1,1) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) ds) '2008-04-08')) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest1 + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + key + 1 + 1 + TOK_SELEXPR + TOK_FUNCTIONDI + count + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + value + 5 + TOK_SELEXPR + TOK_FUNCTION + concat + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + key + 1 + 1 + TOK_FUNCTION + sum + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + value + 5 + TOK_SELEXPR + TOK_FUNCTIONDI + sum + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + value + 5 + TOK_SELEXPR + TOK_FUNCTIONDI + count + . + TOK_TABLE_OR_COL + src + value + TOK_WHERE + = + . + TOK_TABLE_OR_COL + src + ds + '2008-04-08' + TOK_GROUPBY + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + key + 1 + 1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -28,64 +113,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT substr(value, 5)) - expr: sum(substr(value, 5)) - expr: sum(DISTINCT substr(value, 5)) - expr: count(DISTINCT value) - bucketGroup: false - keys: - expr: substr(key, 1, 1) - type: string - expr: substr(value, 5) - type: string - expr: value - type: string + aggregations: count(DISTINCT substr(value, 5)), sum(substr(value, 5)), sum(DISTINCT substr(value, 5)), count(DISTINCT value) + keys: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col3 - type: bigint - expr: _col4 - type: double - expr: _col5 - type: double - expr: _col6 - type: bigint + value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -179,41 +228,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(VALUE._col1) - expr: sum(DISTINCT KEY._col1:1._col0) - expr: count(DISTINCT KEY._col1:2._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(DISTINCT KEY._col1:2._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string - expr: UDFToInteger(_col3) - type: int - expr: UDFToInteger(_col4) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out index d30385c..f9406e6 100644 --- ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out +++ ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out @@ -18,9 +18,6 @@ from src insert overwrite table dest1 select key, count(distinct value) group by key insert overwrite table dest2 select key+key, count(distinct value) group by key+key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -33,54 +30,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: value (type: string) sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: -1 - value expressions: - expr: key - type: string - expr: (key + key) - type: double + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), (key + key) (type: double) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col1 - type: double + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col1 (type: double) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -88,41 +70,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -144,41 +113,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - Map-reduce partition columns: - expr: _col0 - type: double - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: double + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: double) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -268,9 +224,6 @@ POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:s POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -282,87 +235,56 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT value) - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + aggregations: count(DISTINCT value) + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT value) - bucketGroup: false - keys: - expr: (key + key) - type: double - expr: value - type: string + aggregations: count(DISTINCT value) + keys: (key + key) (type: double), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -384,43 +306,28 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: double - expr: _col1 - type: string + key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: double - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: double + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out index 00019fa..fe00553 100644 --- ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out @@ -35,9 +35,6 @@ INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (>= (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g3))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (< (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g4))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 @@ -50,135 +47,78 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(key, 1, 1) - type: string - expr: substr(value, 5) - type: string + key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) sort order: ++ - Map-reduce partition columns: - expr: substr(key, 1, 1) - type: string - tag: -1 - value expressions: - expr: value - type: string + Map-reduce partition columns: substr(key, 1, 1) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 >= 5) - type: boolean + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(KEY._col1:0._col0) - expr: sum(DISTINCT KEY._col1:1._col0) - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string - expr: UDFToInteger(_col3) - type: int - expr: UDFToInteger(_col4) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 Filter Operator - predicate: - expr: (KEY._col0 < 5) - type: boolean + predicate: (KEY._col0 < 5) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(KEY._col1:0._col0) - expr: sum(DISTINCT KEY._col1:1._col0) - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string - expr: UDFToInteger(_col3) - type: int - expr: UDFToInteger(_col4) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(KEY._col1:0._col0) - expr: sum(DISTINCT KEY._col1:1._col0) - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string - expr: UDFToInteger(_col3) - type: int - expr: UDFToInteger(_col4) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -377,9 +317,6 @@ POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type: POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (>= (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g3))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (< (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g4))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_h2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 2 1)) (TOK_LIMIT 10)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_h3))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (>= (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 2 1)))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-0 depends on stages: Stage-5 @@ -398,149 +335,88 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(key, 1, 1) - type: string - expr: substr(value, 5) - type: string + key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) sort order: ++ - Map-reduce partition columns: - expr: substr(key, 1, 1) - type: string - tag: -1 - value expressions: - expr: value - type: string + Map-reduce partition columns: substr(key, 1, 1) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 >= 5) - type: boolean + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(KEY._col1:0._col0) - expr: sum(DISTINCT KEY._col1:1._col0) - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string - expr: UDFToInteger(_col3) - type: int - expr: UDFToInteger(_col4) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 Filter Operator - predicate: - expr: (KEY._col0 < 5) - type: boolean + predicate: (KEY._col0 < 5) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(KEY._col1:0._col0) - expr: sum(DISTINCT KEY._col1:1._col0) - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string - expr: UDFToInteger(_col3) - type: int - expr: UDFToInteger(_col4) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(KEY._col1:0._col0) - expr: sum(DISTINCT KEY._col1:1._col0) - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string - expr: UDFToInteger(_col3) - type: int - expr: UDFToInteger(_col4) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -588,96 +464,52 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: substr(key, 1, 1) - type: string - expr: substr(key, 2, 1) - type: string - expr: substr(value, 5) - type: string + key expressions: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string), substr(value, 5) (type: string) sort order: +++ - Map-reduce partition columns: - expr: substr(key, 1, 1) - type: string - expr: substr(key, 2, 1) - type: string - tag: -1 - value expressions: - expr: value - type: string + Map-reduce partition columns: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col2:0._col0) - expr: sum(KEY._col2:0._col0) - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: bigint - expr: concat(_col0, _col3) - type: string - expr: _col3 - type: double - expr: _col4 - type: bigint + expressions: _col0 (type: string), _col2 (type: bigint), concat(_col0, _col3) (type: string), _col3 (type: double), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator - predicate: - expr: (KEY._col0 >= 5) - type: boolean + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col2:0._col0) - expr: sum(KEY._col2:0._col0) - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col2) - type: int - expr: concat(_col0, _col3) - type: string - expr: UDFToInteger(_col3) - type: int - expr: UDFToInteger(_col4) - type: int + expressions: _col0 (type: string), UDFToInteger(_col2) (type: int), concat(_col0, _col3) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 5 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -686,42 +518,25 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: double - expr: _col4 - type: bigint + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: double), _col4 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: _col2 - type: string - expr: UDFToInteger(_col3) - type: int - expr: UDFToInteger(_col4) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 4 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out index 0241b96..c3680ad 100644 --- ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out +++ ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out @@ -18,9 +18,6 @@ FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) key)))) (TOK_WHERE (>= (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g3))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) key))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (< (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -31,91 +28,63 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((substr(key, 1, 1) >= 5) or (substr(key, 1, 1) < 5)) - type: boolean + predicate: ((substr(key, 1, 1) >= 5) or (substr(key, 1, 1) < 5)) (type: boolean) + Statistics: Num rows: 18 Data size: 3607 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 18 Data size: 3607 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(key, 1, 1) - type: string - expr: key - type: string + key expressions: substr(key, 1, 1) (type: string), key (type: string) sort order: ++ - Map-reduce partition columns: - expr: substr(key, 1, 1) - type: string - tag: -1 - value expressions: - expr: value - type: string + Map-reduce partition columns: substr(key, 1, 1) (type: string) + Statistics: Num rows: 18 Data size: 3607 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 18 Data size: 3607 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 >= 5) - type: boolean + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 Filter Operator - predicate: - expr: (KEY._col0 < 5) - type: boolean + predicate: (KEY._col0 < 5) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out index cf5aa71..6a462a3 100644 --- ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out +++ ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out @@ -32,9 +32,6 @@ select key, count(*) where src.value in ('val_400', 'val_500') AND key in (400, 450) group by key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (AND (TOK_FUNCTION in (. (TOK_TABLE_OR_COL src) value) 'val_100' 'val_200' 'val_300') (TOK_FUNCTION in (TOK_TABLE_OR_COL key) 100 150 200))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (AND (TOK_FUNCTION in (. (TOK_TABLE_OR_COL src) value) 'val_400' 'val_500') (TOK_FUNCTION in (TOK_TABLE_OR_COL key) 400 450))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -45,86 +42,63 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200)) or ((value) IN ('val_400', 'val_500') and (key) IN (400, 450))) - type: boolean + predicate: (((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200)) or ((value) IN ('val_400', 'val_500') and (key) IN (400, 450))) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: -1 - value expressions: - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) - type: boolean + predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count() + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 Filter Operator - predicate: - expr: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) - type: boolean + predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count() + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -240,9 +214,6 @@ POSTHOOK: Lineage: e1.count EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: e1.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: e2.count EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: e2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (or (or (= (+ (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) key)) 200) (= (- (. (TOK_TABLE_OR_COL src) key) 100) 100)) (AND (= (. (TOK_TABLE_OR_COL src) key) 300) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL VALUE))))) (TOK_GROUPBY (TOK_TABLE_OR_COL value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (or (= (+ (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) key)) 400) (AND (= (- (. (TOK_TABLE_OR_COL src) key) 100) 500) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL VALUE))))) (TOK_GROUPBY (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -253,86 +224,63 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((key + key) = 200) or ((key - 100) = 100)) or ((key = 300) and value is not null)) or (((key + key) = 400) or (((key - 100) = 500) and value is not null))) - type: boolean + predicate: (((((key + key) = 200) or ((key - 100) = 100)) or ((key = 300) and value is not null)) or (((key + key) = 400) or (((key - 100) = 500) and value is not null))) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: value (type: string) sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: -1 - value expressions: - expr: key - type: string + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((VALUE._col0 + VALUE._col0) = 200) or ((VALUE._col0 - 100) = 100)) or ((VALUE._col0 = 300) and KEY._col0 is not null)) - type: boolean + predicate: ((((VALUE._col0 + VALUE._col0) = 200) or ((VALUE._col0 - 100) = 100)) or ((VALUE._col0 = 300) and KEY._col0 is not null)) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count() + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 Filter Operator - predicate: - expr: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) - type: boolean + predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) + Statistics: Num rows: 21 Data size: 4208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count() + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2003 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2003 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 10 Data size: 2003 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -464,9 +412,6 @@ POSTHOOK: Lineage: e2.count EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: e2.count EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: e2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: e2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (AND (TOK_FUNCTION in (. (TOK_TABLE_OR_COL src) value) 'val_100' 'val_200' 'val_300') (TOK_FUNCTION in (TOK_TABLE_OR_COL key) 100 150 200))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (AND (TOK_FUNCTION in (. (TOK_TABLE_OR_COL src) value) 'val_400' 'val_500') (TOK_FUNCTION in (TOK_TABLE_OR_COL key) 400 450))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -477,86 +422,63 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200)) or ((value) IN ('val_400', 'val_500') and (key) IN (400, 450))) - type: boolean + predicate: (((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200)) or ((value) IN ('val_400', 'val_500') and (key) IN (400, 450))) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: -1 - value expressions: - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) - type: boolean + predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count() + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 Filter Operator - predicate: - expr: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) - type: boolean + predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count() + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -704,9 +626,6 @@ POSTHOOK: Lineage: e2.count EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: e2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: e2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: e2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (or (or (= (+ (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) key)) 200) (= (- (. (TOK_TABLE_OR_COL src) key) 100) 100)) (AND (= (. (TOK_TABLE_OR_COL src) key) 300) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL VALUE))))) (TOK_GROUPBY (TOK_TABLE_OR_COL value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (or (= (+ (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) key)) 400) (AND (= (- (. (TOK_TABLE_OR_COL src) key) 100) 500) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL VALUE))))) (TOK_GROUPBY (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -717,86 +636,63 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((key + key) = 200) or ((key - 100) = 100)) or ((key = 300) and value is not null)) or (((key + key) = 400) or (((key - 100) = 500) and value is not null))) - type: boolean + predicate: (((((key + key) = 200) or ((key - 100) = 100)) or ((key = 300) and value is not null)) or (((key + key) = 400) or (((key - 100) = 500) and value is not null))) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: value (type: string) sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: -1 - value expressions: - expr: key - type: string + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((VALUE._col0 + VALUE._col0) = 200) or ((VALUE._col0 - 100) = 100)) or ((VALUE._col0 = 300) and KEY._col0 is not null)) - type: boolean + predicate: ((((VALUE._col0 + VALUE._col0) = 200) or ((VALUE._col0 - 100) = 100)) or ((VALUE._col0 = 300) and KEY._col0 is not null)) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count() + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 Filter Operator - predicate: - expr: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) - type: boolean + predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) + Statistics: Num rows: 21 Data size: 4208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count() + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2003 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2003 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 10 Data size: 2003 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_position.q.out ql/src/test/results/clientpositive/groupby_position.q.out index a0de1da..6c96be0 100644 --- ql/src/test/results/clientpositive/groupby_position.q.out +++ ql/src/test/results/clientpositive/groupby_position.q.out @@ -22,9 +22,6 @@ FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testTable1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testTable2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -36,97 +33,62 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 20) - type: boolean + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT substr(value, 5)) - bucketGroup: false - keys: - expr: key - type: string - expr: substr(value, 5) - type: string + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Filter Operator - predicate: - expr: (key < 20) - type: boolean + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT substr(value, 5)) - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string - expr: substr(value, 5) - type: string + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), value (type: string), substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -148,51 +110,28 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col2:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -298,9 +237,6 @@ POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testTable1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testTable2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) value) (. (TOK_TABLE_OR_COL SRC) key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -312,97 +248,62 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 20) - type: boolean + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT substr(value, 5)) - bucketGroup: false - keys: - expr: key - type: string - expr: substr(value, 5) - type: string + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Filter Operator - predicate: - expr: (key < 20) - type: boolean + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT substr(value, 5)) - bucketGroup: false - keys: - expr: value - type: string - expr: key - type: string - expr: substr(value, 5) - type: string + aggregations: count(DISTINCT substr(value, 5)) + keys: value (type: string), key (type: string), substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: bigint + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -424,51 +325,28 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col2:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col1) - type: int - expr: _col0 - type: string - expr: _col2 - type: bigint + expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -598,9 +476,6 @@ POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) value)) (TOK_WHERE (<= (. (TOK_TABLE_OR_COL b) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL t) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (. (TOK_TABLE_OR_COL t) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL t) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -609,60 +484,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t:b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key <= 20) - type: boolean + predicate: (key <= 20) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -670,27 +527,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: bigint - expr: _col0 - type: string + key expressions: _col1 (type: bigint), _col0 (type: string) sort order: -+ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -778,9 +627,6 @@ POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) src1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) value) c2) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL src1) value) 5)) c3)) (TOK_WHERE (and (> (. (TOK_TABLE_OR_COL src1) key) 10) (< (. (TOK_TABLE_OR_COL src1) key) 20))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src1) value)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) src2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) key) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) value) c4)) (TOK_WHERE (and (> (. (TOK_TABLE_OR_COL src2) key) 15) (< (. (TOK_TABLE_OR_COL src2) key) 25))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src2) key) (. (TOK_TABLE_OR_COL src2) value)))) b) (= (. (TOK_TABLE_OR_COL a) c1) (. (TOK_TABLE_OR_COL b) c3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) c1) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) c2) c2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) c3) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) c4) c4)))) c)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL c1)) (TOK_SELEXPR (TOK_TABLE_OR_COL c2)) (TOK_SELEXPR (TOK_TABLE_OR_COL c3)) (TOK_SELEXPR (TOK_TABLE_OR_COL c4))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL c1)) (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL c2)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL c3)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL c4))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -791,74 +637,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c:a:src1 + Map Operator Tree: TableScan alias: src1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT substr(value, 5)) - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string - expr: substr(value, 5) - type: string + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), value (type: string), substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col2:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -866,39 +680,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - c:$INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - c:$INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -906,22 +702,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -929,35 +717,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: --++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -965,63 +737,39 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - c:b:src2 + Map Operator Tree: TableScan alias: src2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 15) and (key < 25)) - type: boolean + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/groupby_ppd.q.out ql/src/test/results/clientpositive/groupby_ppd.q.out index 017d7e7..a81c772 100644 --- ql/src/test/results/clientpositive/groupby_ppd.q.out +++ ql/src/test/results/clientpositive/groupby_ppd.q.out @@ -9,9 +9,6 @@ PREHOOK: query: explain select * from (select foo, bar from (select bar, foo fro PREHOOK: type: QUERY POSTHOOK: query: explain select * from (select foo, bar from (select bar, foo from invites c union all select bar, foo from invites d) b) a group by bar, foo having bar=1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME invites) c)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bar)) (TOK_SELEXPR (TOK_TABLE_OR_COL foo))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME invites) d)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bar)) (TOK_SELEXPR (TOK_TABLE_OR_COL foo)))))) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL foo)) (TOK_SELEXPR (TOK_TABLE_OR_COL bar))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_GROUPBY (TOK_TABLE_OR_COL bar) (TOK_TABLE_OR_COL foo)) (TOK_HAVING (= (TOK_TABLE_OR_COL bar) 1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -19,115 +16,72 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a-subquery1:b-subquery1:c + Map Operator Tree: TableScan - alias: c + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (bar = 1) - type: boolean + predicate: (bar = 1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: bar - type: int - expr: foo - type: int + expressions: bar (type: int), foo (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: int - expr: _col0 - type: int + expressions: _col1 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col1 - type: int - expr: _col0 - type: int + keys: _col1 (type: int), _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - a-subquery2:b-subquery2:d + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan - alias: d + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (bar = 1) - type: boolean + predicate: (bar = 1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: bar - type: int - expr: foo - type: int + expressions: bar (type: int), foo (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: int - expr: _col0 - type: int + expressions: _col1 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col1 - type: int - expr: _col0 - type: int + keys: _col1 (type: int), _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int + expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_ppr.q.out ql/src/test/results/clientpositive/groupby_ppr.q.out index e2ffbbc..c8eed7c 100644 --- ql/src/test/results/clientpositive/groupby_ppr.q.out +++ ql/src/test/results/clientpositive/groupby_ppr.q.out @@ -18,7 +18,75 @@ WHERE src.ds = '2008-04-08' GROUP BY substr(src.key,1,1) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) ds) '2008-04-08')) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest1 + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + key + 1 + 1 + TOK_SELEXPR + TOK_FUNCTIONDI + count + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + value + 5 + TOK_SELEXPR + TOK_FUNCTION + concat + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + key + 1 + 1 + TOK_FUNCTION + sum + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + value + 5 + TOK_WHERE + = + . + TOK_TABLE_OR_COL + src + ds + '2008-04-08' + TOK_GROUPBY + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + key + 1 + 1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -28,34 +96,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(key, 1, 1) - type: string - expr: substr(value, 5) - type: string + key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) sort order: ++ - Map-reduce partition columns: - expr: substr(key, 1, 1) - type: string - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: substr(key, 1, 1) (type: string) + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE tag: -1 Path -> Alias: #### A masked pattern was here #### @@ -150,35 +204,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out index a153a5c..31d6dec 100644 --- ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out @@ -18,7 +18,92 @@ WHERE src.ds = '2008-04-08' GROUP BY substr(src.key,1,1) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) ds) '2008-04-08')) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest1 + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + key + 1 + 1 + TOK_SELEXPR + TOK_FUNCTIONDI + count + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + value + 5 + TOK_SELEXPR + TOK_FUNCTION + concat + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + key + 1 + 1 + TOK_FUNCTION + sum + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + value + 5 + TOK_SELEXPR + TOK_FUNCTIONDI + sum + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + value + 5 + TOK_SELEXPR + TOK_FUNCTIONDI + count + . + TOK_TABLE_OR_COL + src + value + TOK_WHERE + = + . + TOK_TABLE_OR_COL + src + ds + '2008-04-08' + TOK_GROUPBY + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + key + 1 + 1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -28,36 +113,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: substr(key, 1, 1) - type: string - expr: substr(value, 5) - type: string - expr: value - type: string + key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) sort order: +++ - Map-reduce partition columns: - expr: substr(key, 1, 1) - type: string - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: substr(key, 1, 1) (type: string) + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE tag: -1 Path -> Alias: #### A masked pattern was here #### @@ -152,41 +221,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: sum(KEY._col1:0._col0) - expr: sum(DISTINCT KEY._col1:1._col0) - expr: count(DISTINCT KEY._col1:2._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(DISTINCT KEY._col1:2._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: concat(_col0, _col2) - type: string - expr: UDFToInteger(_col3) - type: int - expr: UDFToInteger(_col4) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/groupby_resolution.q.out ql/src/test/results/clientpositive/groupby_resolution.q.out index 0a78619..7970df5 100644 --- ql/src/test/results/clientpositive/groupby_resolution.q.out +++ ql/src/test/results/clientpositive/groupby_resolution.q.out @@ -2,9 +2,6 @@ PREHOOK: query: explain select key, count(*) from src b group by b.key PREHOOK: type: QUERY POSTHOOK: query: explain select key, count(*) from src b group by b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -12,44 +9,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: -1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count() + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -63,9 +49,6 @@ PREHOOK: query: explain select b.key, count(*) from src b group by key PREHOOK: type: QUERY POSTHOOK: query: explain select b.key, count(*) from src b group by key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -73,44 +56,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: -1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count() + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -124,9 +96,6 @@ PREHOOK: query: explain select key, count(*) from src b group by b.key PREHOOK: type: QUERY POSTHOOK: query: explain select key, count(*) from src b group by b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -135,37 +104,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count() + keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -173,41 +133,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -221,9 +168,6 @@ PREHOOK: query: explain select b.key, count(*) from src b group by key PREHOOK: type: QUERY POSTHOOK: query: explain select b.key, count(*) from src b group by key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -232,37 +176,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count() + keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -270,41 +205,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -318,9 +240,6 @@ PREHOOK: query: explain select key, count(*) from src b group by b.key PREHOOK: type: QUERY POSTHOOK: query: explain select key, count(*) from src b group by b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -328,56 +247,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -391,9 +294,6 @@ PREHOOK: query: explain select b.key, count(*) from src b group by key PREHOOK: type: QUERY POSTHOOK: query: explain select b.key, count(*) from src b group by key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -401,56 +301,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -464,9 +348,6 @@ PREHOOK: query: explain select key, count(*) from src b group by b.key PREHOOK: type: QUERY POSTHOOK: query: explain select key, count(*) from src b group by b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -475,49 +356,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -525,41 +392,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -573,9 +427,6 @@ PREHOOK: query: explain select b.key, count(*) from src b group by key PREHOOK: type: QUERY POSTHOOK: query: explain select b.key, count(*) from src b group by key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -584,49 +435,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -634,41 +471,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -777,9 +601,6 @@ from src b group by b.key having key < '12' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_FUNCTIONSTAR count)))))))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)) (TOK_HAVING (< (TOK_TABLE_OR_COL key) '12')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -789,53 +610,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < '12') - type: boolean + predicate: (key < '12') (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -843,34 +649,23 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -878,40 +673,26 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: 0 - type: int - expr: _col1 - type: bigint + key expressions: 0 (type: int), _col1 (type: bigint) sort order: ++ - Map-reduce partition columns: - expr: 0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE PTF Operator + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _wcol0 - type: int + expressions: _col0 (type: string), _col1 (type: bigint), _wcol0 (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -960,9 +741,6 @@ POSTHOOK: query: -- cluster by EXPLAIN SELECT x.key, x.value as key FROM SRC x CLUSTER BY key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) key)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -970,36 +748,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_rollup1.q.out ql/src/test/results/clientpositive/groupby_rollup1.q.out index bdb517a..1f2e757 100644 --- ql/src/test/results/clientpositive/groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/groupby_rollup1.q.out @@ -15,9 +15,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -25,76 +22,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: val - type: string - expr: '0' - type: string + aggregations: count(1) + keys: key (type: string), val (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -132,9 +93,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL val)))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -142,70 +100,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT val) - bucketGroup: false - keys: - expr: key - type: string - expr: '0' - type: string - expr: val - type: string + aggregations: count(DISTINCT val) + keys: key (type: string), '0' (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col2:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -237,9 +165,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -248,63 +173,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: val - type: string - expr: '0' - type: string + aggregations: count(1) + keys: key (type: string), val (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -312,53 +209,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -396,9 +268,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL val)))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -406,68 +275,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT val) - bucketGroup: false - keys: - expr: key - type: string - expr: '0' - type: string - expr: val - type: string + aggregations: count(DISTINCT val) + keys: key (type: string), '0' (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col2:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -513,9 +354,6 @@ FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with rollup INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with rollup POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME T2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME T3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION sum 1))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -529,90 +367,51 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: val - type: string - expr: '0' - type: string + aggregations: count(1) + keys: key (type: string), val (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: sum(1) - bucketGroup: false - keys: - expr: key - type: string - expr: val - type: string - expr: '0' - type: string + aggregations: sum(1) + keys: key (type: string), val (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -620,53 +419,28 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: UDFToInteger(_col3) - type: int + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -688,42 +462,23 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -731,53 +486,28 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: UDFToInteger(_col3) - type: int + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_sort_1.q.out ql/src/test/results/clientpositive/groupby_sort_1.q.out index 3fd4cc7..b1f7f41 100644 --- ql/src/test/results/clientpositive/groupby_sort_1.q.out +++ ql/src/test/results/clientpositive/groupby_sort_1.q.out @@ -47,7 +47,29 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl1 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -62,47 +84,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: final outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -210,8 +216,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -277,8 +282,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -403,7 +407,34 @@ POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type: POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl2 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + val + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + TOK_TABLE_OR_COL + val + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -413,53 +444,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: val - type: string + aggregations: count(1) + keys: key (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col2 - type: bigint + value expressions: _col2 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -514,36 +520,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string - expr: UDFToInteger(_col2) - type: int + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -645,7 +636,44 @@ POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:str POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + val + subq1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl1 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -660,47 +688,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:t1 + Map Operator Tree: TableScan alias: t1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -818,8 +830,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -900,8 +911,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -1047,7 +1057,46 @@ POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:str POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) k) (TOK_SELEXPR (TOK_TABLE_OR_COL val) v)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL k)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + k + TOK_SELEXPR + TOK_TABLE_OR_COL + val + v + subq1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl1 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + k + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + k + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1062,47 +1111,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:t1 + Map Operator Tree: TableScan alias: t1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1220,8 +1253,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -1302,8 +1334,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -1473,7 +1504,32 @@ POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:str POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl3))) (TOK_SELECT (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY 1 (TOK_TABLE_OR_COL key)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl3 + TOK_SELECT + TOK_SELEXPR + 1 + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + 1 + TOK_TABLE_OR_COL + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1488,51 +1544,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: 1 - type: int - expr: key - type: string + aggregations: count(1) + keys: 1 (type: int), key (type: string) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1640,8 +1676,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -1707,8 +1742,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -1873,7 +1907,37 @@ POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl4))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) 1 (TOK_TABLE_OR_COL val)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl4 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + 1 + TOK_SELEXPR + TOK_TABLE_OR_COL + val + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + 1 + TOK_TABLE_OR_COL + val + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1883,59 +1947,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: 1 - type: int - expr: val - type: string + aggregations: count(1) + keys: key (type: string), 1 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col3 - type: bigint + value expressions: _col3 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1990,40 +2023,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: int - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: UDFToInteger(_col3) - type: int + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2158,7 +2172,38 @@ POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:str POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) 1)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (+ (TOK_TABLE_OR_COL key) 1)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl3 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + + + TOK_TABLE_OR_COL + key + 1 + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + + + TOK_TABLE_OR_COL + key + 1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2168,51 +2213,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: (key + 1) - type: double + aggregations: count(1) + keys: key (type: string), (key + 1) (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: double + key expressions: _col0 (type: string), _col1 (type: double) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: double - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string), _col1 (type: double) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col2 - type: bigint + value expressions: _col2 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2267,36 +2289,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: double + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2459,7 +2466,56 @@ POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:str POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL cnt)))) (TOK_GROUPBY (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key))))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + cnt + TOK_GROUPBY + TOK_TABLE_OR_COL + key + subq1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl1 + TOK_SELECT + TOK_SELEXPR + + + TOK_TABLE_OR_COL + key + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + sum + TOK_TABLE_OR_COL + cnt + TOK_GROUPBY + + + TOK_TABLE_OR_COL + key + TOK_TABLE_OR_COL + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2469,65 +2525,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:t1 + Map Operator Tree: TableScan alias: t1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: final outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col1) - bucketGroup: false - keys: - expr: (_col0 + _col0) - type: double + aggregations: sum(_col1) + keys: (_col0 + _col0) (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - Map-reduce partition columns: - expr: _col0 - type: double - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col1 - type: bigint + value expressions: _col1 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2582,32 +2611,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: double + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2778,7 +2796,61 @@ POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:str POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_UNION + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + subq1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl1 + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2793,59 +2865,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:subq1-subquery1:t1 + Map Operator Tree: TableScan alias: t1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: final outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Union - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2870,58 +2920,36 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - null-subquery2:subq1-subquery2:t1 TableScan alias: t1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: final outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Union - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3039,8 +3067,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -3121,8 +3148,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -3337,7 +3363,68 @@ POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:str POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)) key) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_UNION + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + + + TOK_TABLE_OR_COL + key + TOK_TABLE_OR_COL + key + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + + + TOK_TABLE_OR_COL + key + TOK_TABLE_OR_COL + key + subq1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl1 + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-9 is a root stage @@ -3353,45 +3440,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:subq1-subquery2:t1 + Map Operator Tree: TableScan alias: t1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: (key + key) - type: double + aggregations: count(1) + keys: (key + key) (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - Map-reduce partition columns: - expr: _col0 - type: double - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col1 - type: bigint + value expressions: _col1 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3446,25 +3516,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: double + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: bigint + expressions: _col0 (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -3485,105 +3545,37 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - TableScan - GatherStats: false - Union - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE - Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int - outputColumnNames: _col0, _col1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - null-subquery1:subq1-subquery1:t1 + Map Operator Tree: TableScan alias: t1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: final outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToDouble(_col0) - type: double - expr: _col1 - type: bigint + expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Union - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3608,6 +3600,44 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + TableScan + GatherStats: false + Union + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 1 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3722,8 +3752,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -3804,8 +3833,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -4026,7 +4054,87 @@ POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:str POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL subq1) cnt) (. (TOK_TABLE_OR_COL subq2) cnt)))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + cnt + TOK_GROUPBY + TOK_TABLE_OR_COL + key + subq1 + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + cnt + TOK_GROUPBY + TOK_TABLE_OR_COL + key + subq2 + = + . + TOK_TABLE_OR_COL + subq1 + key + . + TOK_TABLE_OR_COL + subq2 + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl1 + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + subq1 + key + TOK_SELEXPR + + + . + TOK_TABLE_OR_COL + subq1 + cnt + . + TOK_TABLE_OR_COL + subq2 + cnt + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -4036,103 +4144,57 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:t1 + Map Operator Tree: TableScan alias: t1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: final outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - subq2:t1 + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan alias: t1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: final outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: - expr: _col1 - type: bigint + value expressions: _col1 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4192,26 +4254,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 - Statistics: - numRows: 3 dataSize: 13 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger((_col1 + _col3)) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 13 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 3 dataSize: 13 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4402,7 +4456,76 @@ POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:str POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + subq1 + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + val + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + TOK_TABLE_OR_COL + val + subq2 + = + . + TOK_TABLE_OR_COL + subq1 + key + . + TOK_TABLE_OR_COL + subq2 + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-2 is a root stage @@ -4412,53 +4535,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - subq2:t1 + Map Operator Tree: TableScan alias: t1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: val - type: string + aggregations: count(1) + keys: key (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col2 - type: bigint + value expressions: _col2 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4513,29 +4611,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -4556,77 +4640,41 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME - TableScan - GatherStats: false - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint - subq1:t1 + Map Operator Tree: TableScan alias: t1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: final outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + value expressions: _col0 (type: string), _col1 (type: bigint) + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4707,32 +4755,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: - numRows: 3 dataSize: 13 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: - numRows: 3 dataSize: 13 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 3 dataSize: 13 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4872,7 +4906,29 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T2 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl1 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -4882,45 +4938,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t2 + Map Operator Tree: TableScan alias: t2 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) + aggregations: count(1) bucketGroup: true - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col1 - type: bigint + value expressions: _col1 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4975,32 +5015,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -5191,7 +5220,37 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl4))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) 1 (TOK_TABLE_OR_COL val)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T2 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl4 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + 1 + TOK_SELEXPR + TOK_TABLE_OR_COL + val + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + 1 + TOK_TABLE_OR_COL + val + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -5206,57 +5265,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t2 + Map Operator Tree: TableScan alias: t2 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: 1 - type: int - expr: val - type: string + aggregations: count(1) + keys: key (type: string), 1 (type: int), val (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: UDFToInteger(_col3) - type: int + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -5374,8 +5407,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -5456,8 +5488,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -5732,7 +5763,40 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl5))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR 2) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) 1 (TOK_TABLE_OR_COL val) 2))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T2 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl5 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + 1 + TOK_SELEXPR + TOK_TABLE_OR_COL + val + TOK_SELEXPR + 2 + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + 1 + TOK_TABLE_OR_COL + val + 2 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -5747,61 +5811,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t2 + Map Operator Tree: TableScan alias: t2 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: 1 - type: int - expr: val - type: string - expr: 2 - type: int + aggregations: count(1) + keys: key (type: string), 1 (type: int), val (type: string), 2 (type: int) mode: final outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col3 - type: int - expr: UDFToInteger(_col4) - type: int + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -5909,8 +5943,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -5976,8 +6009,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -6214,7 +6246,57 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 constant) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl4))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL constant)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL constant) (TOK_TABLE_OR_COL val)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + 1 + constant + TOK_SELEXPR + TOK_TABLE_OR_COL + val + subq + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl4 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + constant + TOK_SELEXPR + TOK_TABLE_OR_COL + val + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + TOK_TABLE_OR_COL + constant + TOK_TABLE_OR_COL + val + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -6229,59 +6311,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq:t2 + Map Operator Tree: TableScan alias: t2 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: 1 - type: int - expr: val - type: string + expressions: key (type: string), 1 (type: int), val (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: int), _col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: UDFToInteger(_col3) - type: int + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -6399,8 +6453,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -6481,8 +6534,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -6754,7 +6806,79 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 constant) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL constant) constant2) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR 2 constant3)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl4))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL constant3)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL constant3) (TOK_TABLE_OR_COL val)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + 1 + constant + TOK_SELEXPR + TOK_TABLE_OR_COL + val + subq + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + constant + constant2 + TOK_SELEXPR + TOK_TABLE_OR_COL + val + TOK_SELEXPR + 2 + constant3 + subq2 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl4 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + constant3 + TOK_SELEXPR + TOK_TABLE_OR_COL + val + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + TOK_TABLE_OR_COL + constant3 + TOK_TABLE_OR_COL + val + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -6769,59 +6893,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:subq:t2 + Map Operator Tree: TableScan alias: t2 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: 2 - type: int - expr: val - type: string + expressions: key (type: string), 2 (type: int), val (type: string) outputColumnNames: _col0, _col3, _col2 - Statistics: - numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col3 - type: int - expr: _col2 - type: string + aggregations: count(1) + keys: _col0 (type: string), _col3 (type: int), _col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: UDFToInteger(_col3) - type: int + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -6939,8 +7035,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -7021,8 +7116,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -7411,9 +7505,6 @@ POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comm POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -7424,66 +7515,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - t2 + Map Operator Tree: TableScan alias: t2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) + aggregations: count(1) bucketGroup: true - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: val - type: string + aggregations: count(1) + keys: key (type: string), val (type: string) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string - expr: UDFToInteger(_col2) - type: int + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -7491,24 +7560,18 @@ STAGE PLANS: name: default.dest2 Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -7814,9 +7877,6 @@ POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comm POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 8)))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -7827,77 +7887,51 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - x:t2 + Map Operator Tree: TableScan alias: t2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 8) - type: boolean + predicate: (key = 8) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) + aggregations: count(1) bucketGroup: true - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string - expr: UDFToInteger(_col2) - type: int + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -7905,24 +7939,18 @@ STAGE PLANS: name: default.dest2 Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_sort_10.q.out ql/src/test/results/clientpositive/groupby_sort_10.q.out index 8ae7293..9372e4f 100644 --- ql/src/test/results/clientpositive/groupby_sort_10.q.out +++ ql/src/test/results/clientpositive/groupby_sort_10.q.out @@ -27,9 +27,6 @@ EXPLAIN select distinct key from T1 POSTHOOK: type: QUERY POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -37,30 +34,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string + keys: key (type: string) mode: final outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -112,9 +105,6 @@ POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: t1 PARTITION(ds=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t1 PARTITION(ds=2).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -122,47 +112,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 8 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 8 Data size: 60 Basic stats: COMPLETE Column stats: NONE Group By Operator bucketGroup: true - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_sort_11.q.out ql/src/test/results/clientpositive/groupby_sort_11.q.out index 95d46c7..12344a5 100644 --- ql/src/test/results/clientpositive/groupby_sort_11.q.out +++ ql/src/test/results/clientpositive/groupby_sort_11.q.out @@ -27,9 +27,6 @@ EXPLAIN select count(distinct key) from T1 POSTHOOK: type: QUERY POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -37,48 +34,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT key) + aggregations: count(DISTINCT key) bucketGroup: true - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col1 - type: bigint + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -109,9 +97,6 @@ EXPLAIN select count(distinct key), count(1), count(key), sum(distinct key) from POSTHOOK: type: QUERY POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -119,66 +104,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT key) - expr: count(1) - expr: count(key) - expr: sum(DISTINCT key) + aggregations: count(DISTINCT key), count(1), count(key), sum(DISTINCT key) bucketGroup: true - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: double + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: count(VALUE._col1) - expr: count(VALUE._col2) - expr: sum(VALUE._col3) - bucketGroup: false + aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), sum(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: double + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -209,9 +167,6 @@ EXPLAIN select count(distinct key), count(1), count(key), sum(distinct key) from POSTHOOK: type: QUERY POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -219,72 +174,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT key) - expr: count(1) - expr: count(key) - expr: sum(DISTINCT key) + aggregations: count(DISTINCT key), count(1), count(key), sum(DISTINCT key) bucketGroup: true - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: count(VALUE._col1) - expr: count(VALUE._col2) - expr: sum(DISTINCT KEY._col1:1._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), count(VALUE._col1), count(VALUE._col2), sum(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: double + expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -320,9 +244,6 @@ EXPLAIN select key, count(distinct key), count(1), count(key), sum(distinct key) POSTHOOK: type: QUERY POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -330,74 +251,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT key) - expr: count(1) - expr: count(key) - expr: sum(DISTINCT key) + aggregations: count(DISTINCT key), count(1), count(key), sum(DISTINCT key) bucketGroup: true - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: count(VALUE._col1) - expr: count(VALUE._col2) - expr: sum(DISTINCT KEY._col1:1._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0), count(VALUE._col1), count(VALUE._col2), sum(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: double + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -433,9 +321,6 @@ EXPLAIN select count(distinct key+key) from T1 POSTHOOK: type: QUERY POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -443,48 +328,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT (key + key)) - bucketGroup: false - keys: - expr: (key + key) - type: double + aggregations: count(DISTINCT (key + key)) + keys: (key + key) (type: double) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - tag: -1 - value expressions: - expr: _col1 - type: bigint + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0:0._col0) - bucketGroup: false + aggregations: count(DISTINCT KEY._col0:0._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -513,9 +388,6 @@ POSTHOOK: query: EXPLAIN select count(distinct 1) from T1 POSTHOOK: type: QUERY POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -523,44 +395,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: - expr: count(DISTINCT 1) - bucketGroup: false - keys: - expr: 1 - type: int + aggregations: count(DISTINCT 1) + keys: 1 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col1 - type: bigint + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0:0._col0) - bucketGroup: false + aggregations: count(DISTINCT KEY._col0:0._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -591,9 +455,6 @@ EXPLAIN select count(distinct key) from T1 POSTHOOK: type: QUERY POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -601,36 +462,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - tag: -1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0:0._col0) - bucketGroup: false + aggregations: count(DISTINCT KEY._col0:0._col0) mode: complete outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_sort_2.q.out ql/src/test/results/clientpositive/groupby_sort_2.q.out index 11d18e2..559e98f 100644 --- ql/src/test/results/clientpositive/groupby_sort_2.q.out +++ ql/src/test/results/clientpositive/groupby_sort_2.q.out @@ -44,9 +44,6 @@ SELECT val, count(1) FROM T1 GROUP BY val POSTHOOK: type: QUERY POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL val)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -55,56 +52,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: val - type: string + expressions: val (type: string) outputColumnNames: val + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) + aggregations: count(1) bucketGroup: true - keys: - expr: val - type: string + keys: val (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_sort_3.q.out ql/src/test/results/clientpositive/groupby_sort_3.q.out index 531c390..a8fc962 100644 --- ql/src/test/results/clientpositive/groupby_sort_3.q.out +++ ql/src/test/results/clientpositive/groupby_sort_3.q.out @@ -42,9 +42,6 @@ SELECT key, val, count(1) FROM T1 GROUP BY key, val POSTHOOK: type: QUERY POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -58,40 +55,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: val - type: string + aggregations: count(1) + keys: key (type: string), val (type: string) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -122,12 +106,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -136,12 +118,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -213,9 +193,6 @@ POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:stri POSTHOOK: Lineage: outputtbl1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -229,34 +206,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -287,12 +257,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -301,12 +269,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_sort_4.q.out ql/src/test/results/clientpositive/groupby_sort_4.q.out index 089b9fa..f1fb04e 100644 --- ql/src/test/results/clientpositive/groupby_sort_4.q.out +++ ql/src/test/results/clientpositive/groupby_sort_4.q.out @@ -44,9 +44,6 @@ SELECT key, count(1) FROM T1 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -55,56 +52,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) + aggregations: count(1) bucketGroup: true - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -180,9 +162,6 @@ POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -191,68 +170,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: val - type: string + aggregations: count(1) + keys: key (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_sort_5.q.out ql/src/test/results/clientpositive/groupby_sort_5.q.out index 69c00f3..8156361 100644 --- ql/src/test/results/clientpositive/groupby_sort_5.q.out +++ ql/src/test/results/clientpositive/groupby_sort_5.q.out @@ -46,9 +46,6 @@ SELECT key, val, count(1) FROM T1 GROUP BY key, val POSTHOOK: type: QUERY POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -62,40 +59,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: val - type: string + aggregations: count(1) + keys: key (type: string), val (type: string) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -126,12 +110,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -140,12 +122,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -266,9 +246,6 @@ POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comm POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -282,40 +259,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: val - type: string + aggregations: count(1) + keys: key (type: string), val (type: string) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -346,12 +310,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -360,12 +322,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -540,9 +500,6 @@ POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comm POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -551,56 +508,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) + aggregations: count(1) bucketGroup: true - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_sort_6.q.out ql/src/test/results/clientpositive/groupby_sort_6.q.out index 0a01668..3362408 100644 --- ql/src/test/results/clientpositive/groupby_sort_6.q.out +++ ql/src/test/results/clientpositive/groupby_sort_6.q.out @@ -19,7 +19,34 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl1 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_WHERE + = + TOK_TABLE_OR_COL + ds + '1' + TOK_GROUPBY + TOK_TABLE_OR_COL + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -29,81 +56,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 - TableScan - alias: t1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '1') - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Select Operator - expressions: - expr: key - type: string - outputColumnNames: key - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - mode: hash - outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - tag: -1 - value expressions: - expr: _col1 - type: bigint Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -193,7 +163,34 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl1 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_WHERE + = + TOK_TABLE_OR_COL + ds + '1' + TOK_GROUPBY + TOK_TABLE_OR_COL + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -203,81 +200,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 - TableScan - alias: t1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '1') - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Select Operator - expressions: - expr: key - type: string - outputColumnNames: key - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - mode: hash - outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - tag: -1 - value expressions: - expr: _col1 - type: bigint Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -376,7 +316,34 @@ POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + T1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + outputTbl1 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_WHERE + = + TOK_TABLE_OR_COL + ds + '2' + TOK_GROUPBY + TOK_TABLE_OR_COL + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -386,45 +353,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 - Statistics: - numRows: 0 dataSize: 30 basicStatsState: PARTIAL colStatsState: NONE + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 0 dataSize: 30 basicStatsState: PARTIAL colStatsState: NONE + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 30 basicStatsState: PARTIAL colStatsState: NONE + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 0 dataSize: 30 basicStatsState: PARTIAL colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: - expr: _col1 - type: bigint + value expressions: _col1 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -474,32 +424,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/groupby_sort_7.q.out ql/src/test/results/clientpositive/groupby_sort_7.q.out index 44630a0..f6bcf56 100644 --- ql/src/test/results/clientpositive/groupby_sort_7.q.out +++ ql/src/test/results/clientpositive/groupby_sort_7.q.out @@ -49,9 +49,6 @@ SELECT key, val, count(1) FROM T1 where ds = '1' GROUP BY key, val POSTHOOK: type: QUERY POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -65,40 +62,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: key, val + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: val - type: string + aggregations: count(1) + keys: key (type: string), val (type: string) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -129,12 +113,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -143,12 +125,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_sort_8.q.out ql/src/test/results/clientpositive/groupby_sort_8.q.out index 4d3763f..975c4d1 100644 --- ql/src/test/results/clientpositive/groupby_sort_8.q.out +++ ql/src/test/results/clientpositive/groupby_sort_8.q.out @@ -40,9 +40,6 @@ select count(distinct key) from T1 POSTHOOK: type: QUERY POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -50,48 +47,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT key) + aggregations: count(DISTINCT key) bucketGroup: true - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col1 - type: bigint + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -124,9 +112,6 @@ select count(distinct key) from T1 POSTHOOK: type: QUERY POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -134,48 +119,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT key) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(DISTINCT key) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col1 - type: bigint + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0:0._col0) - bucketGroup: false + aggregations: count(DISTINCT KEY._col0:0._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_sort_9.q.out ql/src/test/results/clientpositive/groupby_sort_9.q.out index 4fddfda..db0d2e3 100644 --- ql/src/test/results/clientpositive/groupby_sort_9.q.out +++ ql/src/test/results/clientpositive/groupby_sort_9.q.out @@ -54,9 +54,6 @@ POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, t POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1 PARTITION(ds=2).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1 PARTITION(ds=2).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -64,56 +61,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) + aggregations: count(1) bucketGroup: true - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_sort_test_1.q.out ql/src/test/results/clientpositive/groupby_sort_test_1.q.out index 02b204a..1773686 100644 --- ql/src/test/results/clientpositive/groupby_sort_test_1.q.out +++ ql/src/test/results/clientpositive/groupby_sort_test_1.q.out @@ -44,9 +44,6 @@ SELECT key, count(1) FROM T1 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -55,56 +52,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/having.q.out ql/src/test/results/clientpositive/having.q.out index 50b1f6a..a9a1b25 100644 --- ql/src/test/results/clientpositive/having.q.out +++ ql/src/test/results/clientpositive/having.q.out @@ -2,9 +2,6 @@ PREHOOK: query: EXPLAIN SELECT count(value) AS c FROM src GROUP BY key HAVING c PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT count(value) AS c FROM src GROUP BY key HAVING c > 3 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL value)) c)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_HAVING (> (TOK_TABLE_OR_COL c) 3)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -12,60 +9,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col1 > 3) - type: boolean + predicate: (_col1 > 3) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: bigint + expressions: _col1 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -97,9 +77,6 @@ PREHOOK: query: EXPLAIN SELECT key, max(value) AS c FROM src GROUP BY key HAVING PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key, max(value) AS c FROM src GROUP BY key HAVING key != 302 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL value)) c)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_HAVING (!= (TOK_TABLE_OR_COL key) 302)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -107,62 +84,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key <> 302) - type: boolean + predicate: (key <> 302) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: max(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: max(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -492,9 +450,6 @@ PREHOOK: query: EXPLAIN SELECT key FROM src GROUP BY key HAVING max(value) > "va PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key FROM src GROUP BY key HAVING max(value) > "val_255" POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_HAVING (> (TOK_FUNCTION max (TOK_TABLE_OR_COL value)) "val_255")))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -502,60 +457,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: max(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: max(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col1 > 'val_255') - type: boolean + predicate: (_col1 > 'val_255') (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -776,9 +714,6 @@ PREHOOK: query: EXPLAIN SELECT key FROM src where key > 300 GROUP BY key HAVING PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key FROM src where key > 300 GROUP BY key HAVING max(value) > "val_255" POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (> (TOK_TABLE_OR_COL key) 300)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_HAVING (> (TOK_FUNCTION max (TOK_TABLE_OR_COL value)) "val_255")))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -786,64 +721,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 300) - type: boolean + predicate: (key > 300) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: max(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: max(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col1 > 'val_255') - type: boolean + predicate: (_col1 > 'val_255') (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -990,9 +907,6 @@ PREHOOK: query: EXPLAIN SELECT key, max(value) FROM src GROUP BY key HAVING max( PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT key, max(value) FROM src GROUP BY key HAVING max(value) > "val_255" POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_HAVING (> (TOK_FUNCTION max (TOK_TABLE_OR_COL value)) "val_255")))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1000,62 +914,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: max(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: max(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col1 > 'val_255') - type: boolean + predicate: (_col1 > 'val_255') (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/implicit_cast1.q.out ql/src/test/results/clientpositive/implicit_cast1.q.out index fd55579..b451a63 100644 --- ql/src/test/results/clientpositive/implicit_cast1.q.out +++ ql/src/test/results/clientpositive/implicit_cast1.q.out @@ -13,9 +13,6 @@ SELECT implicit_test1.* FROM implicit_test1 WHERE implicit_test1.a <> 0 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME implicit_test1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME implicit_test1)))) (TOK_WHERE (<> (. (TOK_TABLE_OR_COL implicit_test1) a) 0)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -23,24 +20,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - implicit_test1 + Map Operator Tree: TableScan alias: implicit_test1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (a <> 0) - type: boolean + predicate: (a <> 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: a - type: bigint - expr: b - type: string + expressions: a (type: bigint), b (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_auto.q.out ql/src/test/results/clientpositive/index_auto.q.out index ca6ab9e..4958a60 100644 --- ql/src/test/results/clientpositive/index_auto.q.out +++ ql/src/test/results/clientpositive/index_auto.q.out @@ -67,9 +67,6 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL key) 80) (< (TOK_TABLE_OR_COL key) 100))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -77,37 +74,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 80) and (key < 100)) - type: boolean + predicate: ((key > 80) and (key < 100)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -156,9 +144,6 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL key) 80) (< (TOK_TABLE_OR_COL key) 100))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-8 depends on stages: Stage-3 , consists of Stage-5, Stage-4, Stage-6 @@ -173,27 +158,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - default__src_src_index__ + Map Operator Tree: TableScan alias: default__src_src_index__ filterExpr: - expr: ((key > 80) and (key < 100)) - type: boolean Filter Operator - predicate: - expr: ((key > 80) and (key < 100)) - type: boolean + predicate: ((key > 80) and (key < 100)) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -216,40 +191,29 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src filterExpr: - expr: ((key > 80) and (key < 100)) - type: boolean + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 80) and (key < 100)) - type: boolean + predicate: ((key > 80) and (key < 100)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -257,12 +221,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -270,12 +232,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_auto_empty.q.out ql/src/test/results/clientpositive/index_auto_empty.q.out index 8bd571e..c72b981 100644 --- ql/src/test/results/clientpositive/index_auto_empty.q.out +++ ql/src/test/results/clientpositive/index_auto_empty.q.out @@ -47,9 +47,6 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME temp))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -57,27 +54,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - temp + Map Operator Tree: TableScan alias: temp filterExpr: - expr: (key = 86) - type: boolean + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_auto_file_format.q.out ql/src/test/results/clientpositive/index_auto_file_format.q.out index ba81403..bd22334 100644 --- ql/src/test/results/clientpositive/index_auto_file_format.q.out +++ ql/src/test/results/clientpositive/index_auto_file_format.q.out @@ -23,9 +23,6 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-8 depends on stages: Stage-3 , consists of Stage-5, Stage-4, Stage-6 @@ -40,27 +37,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - default__src_src_index__ + Map Operator Tree: TableScan alias: default__src_src_index__ filterExpr: - expr: (key = 86) - type: boolean Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -83,40 +70,29 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src filterExpr: - expr: (key = 86) - type: boolean + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -124,12 +100,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -137,12 +111,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -179,9 +151,6 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-8 depends on stages: Stage-3 , consists of Stage-5, Stage-4, Stage-6 @@ -196,27 +165,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - default__src_src_index__ + Map Operator Tree: TableScan alias: default__src_src_index__ filterExpr: - expr: (key = 86) - type: boolean Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -239,40 +198,29 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src filterExpr: - expr: (key = 86) - type: boolean + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -280,12 +228,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -293,12 +239,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_auto_mult_tables.q.out ql/src/test/results/clientpositive/index_auto_mult_tables.q.out index 622a002..e13c92a 100644 --- ql/src/test/results/clientpositive/index_auto_mult_tables.q.out +++ ql/src/test/results/clientpositive/index_auto_mult_tables.q.out @@ -8,9 +8,6 @@ POSTHOOK: query: -- try the query without indexing, with manual indexing, and wi -- without indexing EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -19,44 +16,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) - type: boolean + predicate: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) - type: boolean + predicate: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -64,18 +47,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -83,25 +62,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -245,9 +218,6 @@ POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=1 POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -260,65 +230,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - tmp_index:ind0:default__srcpart_srcpart_index__ + Map Operator Tree: TableScan alias: default__srcpart_srcpart_index__ filterExpr: - expr: (((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) - type: boolean Filter Operator - predicate: - expr: (((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) - type: boolean + predicate: (((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offset - type: bigint + expressions: _bucketname (type: string), _offset (type: bigint) outputColumnNames: _col0, _col1 Group By Operator - aggregations: - expr: collect_set(_col1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: collect_set(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: array + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: array) Reduce Operator Tree: Group By Operator - aggregations: - expr: collect_set(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: collect_set(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: array + expressions: _col0 (type: string), _col1 (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -332,50 +273,32 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b filterExpr: - expr: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) - type: boolean + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) - type: boolean + predicate: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: a filterExpr: - expr: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) - type: boolean + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) - type: boolean + predicate: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -383,18 +306,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -402,25 +321,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -428,65 +341,36 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: - tmp_index:ind0:default__src_src_index__ + Map Operator Tree: TableScan alias: default__src_src_index__ filterExpr: - expr: (((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) - type: boolean Filter Operator - predicate: - expr: (((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) - type: boolean + predicate: (((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offset - type: bigint + expressions: _bucketname (type: string), _offset (type: bigint) outputColumnNames: _col0, _col1 Group By Operator - aggregations: - expr: collect_set(_col1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: collect_set(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: array + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: array) Reduce Operator Tree: Group By Operator - aggregations: - expr: collect_set(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: collect_set(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: array + expressions: _col0 (type: string), _col1 (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out index 4a4e6ff..a1c0357 100644 --- ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out +++ ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out @@ -8,9 +8,6 @@ POSTHOOK: query: -- try the query without indexing, with manual indexing, and wi -- without indexing EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -19,44 +16,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) - type: boolean + predicate: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) - type: boolean + predicate: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -64,18 +47,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -83,25 +62,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -241,9 +214,6 @@ POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(nam POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-10 depends on stages: Stage-5 , consists of Stage-7, Stage-6, Stage-8 @@ -266,27 +236,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - default__srcpart_srcpart_index__ + Map Operator Tree: TableScan alias: default__srcpart_srcpart_index__ filterExpr: - expr: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) - type: boolean Filter Operator - predicate: - expr: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) - type: boolean + predicate: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -309,50 +269,32 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b filterExpr: - expr: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) - type: boolean + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) - type: boolean + predicate: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: a filterExpr: - expr: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) - type: boolean + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) - type: boolean + predicate: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -360,18 +302,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -379,25 +317,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -405,12 +337,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -418,12 +348,10 @@ STAGE PLANS: Stage: Stage-8 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -437,27 +365,17 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: - default__src_src_index__ + Map Operator Tree: TableScan alias: default__src_src_index__ filterExpr: - expr: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) - type: boolean Filter Operator - predicate: - expr: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) - type: boolean + predicate: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -480,12 +398,10 @@ STAGE PLANS: Stage: Stage-13 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -493,12 +409,10 @@ STAGE PLANS: Stage: Stage-15 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_auto_multiple.q.out ql/src/test/results/clientpositive/index_auto_multiple.q.out index 1fe6eca..d99dc74 100644 --- ql/src/test/results/clientpositive/index_auto_multiple.q.out +++ ql/src/test/results/clientpositive/index_auto_multiple.q.out @@ -47,9 +47,6 @@ POSTHOOK: Lineage: default__src_src_key_index__.key SIMPLE [(src)src.FieldSchema POSTHOOK: Lineage: default__src_src_val_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_val_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_val_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-8 depends on stages: Stage-3 , consists of Stage-5, Stage-4, Stage-6 @@ -64,27 +61,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - default__src_src_key_index__ + Map Operator Tree: TableScan alias: default__src_src_key_index__ filterExpr: - expr: (key = 86) - type: boolean Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -107,40 +94,29 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src filterExpr: - expr: (key = 86) - type: boolean + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -148,12 +124,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -161,12 +135,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_auto_partitioned.q.out ql/src/test/results/clientpositive/index_auto_partitioned.q.out index 3623d71..1266679 100644 --- ql/src/test/results/clientpositive/index_auto_partitioned.q.out +++ ql/src/test/results/clientpositive/index_auto_partitioned.q.out @@ -37,9 +37,6 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL key) 86) (= (TOK_TABLE_OR_COL ds) '2008-04-09'))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-8 depends on stages: Stage-3 , consists of Stage-5, Stage-4, Stage-6 @@ -54,27 +51,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - default__srcpart_src_part_index__ + Map Operator Tree: TableScan alias: default__srcpart_src_part_index__ filterExpr: - expr: ((key = 86) and (ds = '2008-04-09')) - type: boolean Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -97,40 +84,29 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart filterExpr: - expr: ((key = 86) and (ds = '2008-04-09')) - type: boolean + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -138,12 +114,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -151,12 +125,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_auto_self_join.q.out ql/src/test/results/clientpositive/index_auto_self_join.q.out index b8b0df9..6945949 100644 --- ql/src/test/results/clientpositive/index_auto_self_join.q.out +++ ql/src/test/results/clientpositive/index_auto_self_join.q.out @@ -6,9 +6,6 @@ POSTHOOK: query: -- try the query without indexing, with manual indexing, and wi EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -17,45 +14,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 80) and (key < 100)) - type: boolean + predicate: ((key > 70) and (key < 90)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: value (type: string) sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 0 - value expressions: - expr: key - type: string - b + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 70) and (key < 90)) - type: boolean + predicate: ((key > 80) and (key < 100)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: value (type: string) sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -63,18 +46,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col4 + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -82,25 +61,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -155,9 +128,6 @@ POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldS POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -170,65 +140,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - tmp_index:ind0:default__src_src_index__ + Map Operator Tree: TableScan alias: default__src_src_index__ filterExpr: - expr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) - type: boolean Filter Operator - predicate: - expr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) - type: boolean + predicate: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offset - type: bigint + expressions: _bucketname (type: string), _offset (type: bigint) outputColumnNames: _col0, _col1 Group By Operator - aggregations: - expr: collect_set(_col1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: collect_set(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: array + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: array) Reduce Operator Tree: Group By Operator - aggregations: - expr: collect_set(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: collect_set(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: array + expressions: _col0 (type: string), _col1 (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -242,51 +183,33 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b filterExpr: - expr: ((key > 80) and (key < 100)) - type: boolean + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 80) and (key < 100)) - type: boolean + predicate: ((key > 70) and (key < 90)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: value (type: string) sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 0 - value expressions: - expr: key - type: string - b + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: b + alias: a filterExpr: - expr: ((key > 70) and (key < 90)) - type: boolean + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 70) and (key < 90)) - type: boolean + predicate: ((key > 80) and (key < 100)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: value (type: string) sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -294,18 +217,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col4 + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -313,25 +232,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -339,65 +252,36 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: - tmp_index:ind0:default__src_src_index__ + Map Operator Tree: TableScan alias: default__src_src_index__ filterExpr: - expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) - type: boolean Filter Operator - predicate: - expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) - type: boolean + predicate: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offset - type: bigint + expressions: _bucketname (type: string), _offset (type: bigint) outputColumnNames: _col0, _col1 Group By Operator - aggregations: - expr: collect_set(_col1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: collect_set(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: array + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: array) Reduce Operator Tree: Group By Operator - aggregations: - expr: collect_set(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: collect_set(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: array + expressions: _col0 (type: string), _col1 (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_auto_unused.q.out ql/src/test/results/clientpositive/index_auto_unused.q.out index 3c1d529..2819241 100644 --- ql/src/test/results/clientpositive/index_auto_unused.q.out +++ ql/src/test/results/clientpositive/index_auto_unused.q.out @@ -27,9 +27,6 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL key) 80) (< (TOK_TABLE_OR_COL key) 100))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -37,40 +34,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src filterExpr: - expr: ((key > 80) and (key < 100)) - type: boolean + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 80) and (key < 100)) - type: boolean + predicate: ((key > 80) and (key < 100)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -119,9 +105,6 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL key) 80) (< (TOK_TABLE_OR_COL key) 100))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -129,40 +112,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src filterExpr: - expr: ((key > 80) and (key < 100)) - type: boolean + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 80) and (key < 100)) - type: boolean + predicate: ((key > 80) and (key < 100)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -211,9 +183,6 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (OR (< (TOK_TABLE_OR_COL key) 10) (> (TOK_TABLE_OR_COL key) 480))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -221,40 +190,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src filterExpr: - expr: ((key < 10) or (key > 480)) - type: boolean + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 10) or (key > 480)) - type: boolean + predicate: ((key < 10) or (key > 480)) (type: boolean) + Statistics: Num rows: 18 Data size: 3607 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 18 Data size: 3607 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 18 Data size: 3607 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 18 Data size: 3607 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 18 Data size: 3607 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -348,9 +306,6 @@ POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(nam POSTHOOK: Lineage: default__src_src_val_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_val_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_val_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL key) 80) (< (TOK_TABLE_OR_COL key) 100))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -358,40 +313,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src filterExpr: - expr: ((key > 80) and (key < 100)) - type: boolean + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 80) and (key < 100)) - type: boolean + predicate: ((key > 80) and (key < 100)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -489,9 +433,6 @@ POSTHOOK: Lineage: default__src_src_val_index__.value SIMPLE [(src)src.FieldSche POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-08,hr=11)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-08,hr=11)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (AND (= (TOK_TABLE_OR_COL ds) '2008-04-09') (= (TOK_TABLE_OR_COL hr) 12)) (< (TOK_TABLE_OR_COL key) 10))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -499,48 +440,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart filterExpr: - expr: (((ds = '2008-04-09') and (hr = 12)) and (key < 10)) - type: boolean + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_auto_update.q.out ql/src/test/results/clientpositive/index_auto_update.q.out index e69db89..ae3f902 100644 --- ql/src/test/results/clientpositive/index_auto_update.q.out +++ ql/src/test/results/clientpositive/index_auto_update.q.out @@ -52,9 +52,6 @@ POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.Fi POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME temp))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-4 is a root stage Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 @@ -73,20 +70,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -114,70 +108,33 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - temp + Map Operator Tree: TableScan alias: temp Select Operator - expressions: - expr: key - type: string - expr: INPUT__FILE__NAME - type: string - expr: BLOCK__OFFSET__INSIDE__FILE - type: bigint + expressions: key (type: string), INPUT__FILE__NAME (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) outputColumnNames: key, INPUT__FILE__NAME, BLOCK__OFFSET__INSIDE__FILE Group By Operator - aggregations: - expr: collect_set(BLOCK__OFFSET__INSIDE__FILE) - bucketGroup: false - keys: - expr: key - type: string - expr: INPUT__FILE__NAME - type: string + aggregations: collect_set(BLOCK__OFFSET__INSIDE__FILE) + keys: key (type: string), INPUT__FILE__NAME (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: array + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + value expressions: _col2 (type: array) Reduce Operator Tree: Group By Operator - aggregations: - expr: collect_set(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: collect_set(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: array + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: array) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -206,12 +163,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -220,12 +175,10 @@ STAGE PLANS: Stage: Stage-8 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -270,9 +223,6 @@ POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME temp))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-8 depends on stages: Stage-3 , consists of Stage-5, Stage-4, Stage-6 @@ -287,27 +237,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - default__temp_temp_index__ + Map Operator Tree: TableScan alias: default__temp_temp_index__ filterExpr: - expr: (key = 86) - type: boolean Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -330,27 +270,21 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - temp + Map Operator Tree: TableScan alias: temp filterExpr: - expr: (key = 86) - type: boolean + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -358,12 +292,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -371,12 +303,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_bitmap.q.out ql/src/test/results/clientpositive/index_bitmap.q.out index 297378b..2c7c050 100644 --- ql/src/test/results/clientpositive/index_bitmap.q.out +++ ql/src/test/results/clientpositive/index_bitmap.q.out @@ -8,9 +8,6 @@ PREHOOK: type: CREATEINDEX POSTHOOK: query: EXPLAIN CREATE INDEX srcpart_index_proj ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX -ABSTRACT SYNTAX TREE: - (TOK_CREATEINDEX srcpart_index_proj 'BITMAP' (TOK_TABNAME srcpart) (TOK_TABCOLNAME key) TOK_DEFERRED_REBUILDINDEX) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -168,9 +165,6 @@ POSTHOOK: Lineage: default__srcpart_srcpart_index_proj__ PARTITION(ds=2008-04-09 POSTHOOK: Lineage: default__srcpart_srcpart_index_proj__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__srcpart_srcpart_index_proj__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__srcpart_srcpart_index_proj__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_CREATEINDEX srcpart_index_proj 'BITMAP' (TOK_TABNAME srcpart) (TOK_TABCOLNAME key) TOK_DEFERRED_REBUILDINDEX) - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/index_bitmap1.q.out ql/src/test/results/clientpositive/index_bitmap1.q.out index 3c3e534..96af69f 100644 --- ql/src/test/results/clientpositive/index_bitmap1.q.out +++ ql/src/test/results/clientpositive/index_bitmap1.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATEINDEX POSTHOOK: query: EXPLAIN CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX -ABSTRACT SYNTAX TREE: - (TOK_CREATEINDEX src_index 'BITMAP' (TOK_TABNAME src) (TOK_TABCOLNAME key) TOK_DEFERRED_REBUILDINDEX) - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/index_bitmap2.q.out ql/src/test/results/clientpositive/index_bitmap2.q.out index c1a6c7b..2b9d98e 100644 --- ql/src/test/results/clientpositive/index_bitmap2.q.out +++ ql/src/test/results/clientpositive/index_bitmap2.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATEINDEX POSTHOOK: query: EXPLAIN CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX -ABSTRACT SYNTAX TREE: - (TOK_CREATEINDEX src1_index 'BITMAP' (TOK_TABNAME src) (TOK_TABCOLNAME key) TOK_DEFERRED_REBUILDINDEX) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -19,9 +16,6 @@ PREHOOK: type: CREATEINDEX POSTHOOK: query: EXPLAIN CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX -ABSTRACT SYNTAX TREE: - (TOK_CREATEINDEX src2_index 'BITMAP' (TOK_TABNAME src) (TOK_TABCOLNAME value) TOK_DEFERRED_REBUILDINDEX) - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/index_bitmap3.q.out ql/src/test/results/clientpositive/index_bitmap3.q.out index 4fb1a94..b598501 100644 --- ql/src/test/results/clientpositive/index_bitmap3.q.out +++ ql/src/test/results/clientpositive/index_bitmap3.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATEINDEX POSTHOOK: query: EXPLAIN CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX -ABSTRACT SYNTAX TREE: - (TOK_CREATEINDEX src1_index 'BITMAP' (TOK_TABNAME src) (TOK_TABCOLNAME key) TOK_DEFERRED_REBUILDINDEX) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -19,9 +16,6 @@ PREHOOK: type: CREATEINDEX POSTHOOK: query: EXPLAIN CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX -ABSTRACT SYNTAX TREE: - (TOK_CREATEINDEX src2_index 'BITMAP' (TOK_TABNAME src) (TOK_TABCOLNAME value) TOK_DEFERRED_REBUILDINDEX) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -130,9 +124,6 @@ POSTHOOK: Lineage: default__src_src2_index__._bitmaps EXPRESSION [(src)src.Field POSTHOOK: Lineage: default__src_src2_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src2_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME default__src_src1_index__))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL _bucketname) bucketname) (TOK_SELEXPR (TOK_TABLE_OR_COL _offset) offset) (TOK_SELEXPR (TOK_TABLE_OR_COL _bitmaps) bitmaps)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 0)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME default__src_src2_index__))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL _bucketname) bucketname) (TOK_SELEXPR (TOK_TABLE_OR_COL _offset) offset) (TOK_SELEXPR (TOK_TABLE_OR_COL _bitmaps) bitmaps)) (TOK_WHERE (= (TOK_TABLE_OR_COL value) "val_0")))) b) (AND (= (. (TOK_TABLE_OR_COL a) bucketname) (. (TOK_TABLE_OR_COL b) bucketname)) (= (. (TOK_TABLE_OR_COL a) offset) (. (TOK_TABLE_OR_COL b) offset))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) bucketname) _bucketname) (TOK_SELEXPR (TOK_FUNCTION COLLECT_SET (. (TOK_TABLE_OR_COL a) offset)) _offsets)) (TOK_WHERE (NOT (TOK_FUNCTION EWAH_BITMAP_EMPTY (TOK_FUNCTION EWAH_BITMAP_AND (. (TOK_TABLE_OR_COL a) bitmaps) (. (TOK_TABLE_OR_COL b) bitmaps))))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) bucketname)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -141,75 +132,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:default__src_src1_index__ + Map Operator Tree: TableScan alias: default__src_src1_index__ + Statistics: Num rows: 500 Data size: 59311 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 0) - type: boolean + predicate: (key = 0) (type: boolean) + Statistics: Num rows: 250 Data size: 29655 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _bucketname - type: string - expr: _offset - type: bigint - expr: _bitmaps - type: array + expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 29655 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: bigint - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: array - b:default__src_src2_index__ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 29655 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: array) TableScan alias: default__src_src2_index__ + Statistics: Num rows: 500 Data size: 61311 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (value = 'val_0') - type: boolean + predicate: (value = 'val_0') (type: boolean) + Statistics: Num rows: 250 Data size: 30655 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _bucketname - type: string - expr: _offset - type: bigint - expr: _bitmaps - type: array + expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 30655 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: bigint - tag: 1 - value expressions: - expr: _col2 - type: array + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 30655 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: array) Reduce Operator Tree: Join Operator condition map: @@ -217,31 +172,23 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col5 + Statistics: Num rows: 275 Data size: 32620 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (not EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(_col2,_col5))) - type: boolean + predicate: (not EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(_col2,_col5))) (type: boolean) + Statistics: Num rows: 138 Data size: 16369 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 138 Data size: 16369 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: collect_set(_col1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: collect_set(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 138 Data size: 16369 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -249,41 +196,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: array + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 138 Data size: 16369 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: array) Reduce Operator Tree: Group By Operator - aggregations: - expr: collect_set(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: collect_set(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 8184 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: array + expressions: _col0 (type: string), _col1 (type: array) outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 8184 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 69 Data size: 8184 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_bitmap_auto.q.out ql/src/test/results/clientpositive/index_bitmap_auto.q.out index 15a1801..9bee551 100644 --- ql/src/test/results/clientpositive/index_bitmap_auto.q.out +++ ql/src/test/results/clientpositive/index_bitmap_auto.q.out @@ -21,9 +21,6 @@ POSTHOOK: query: -- create indices EXPLAIN CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX -ABSTRACT SYNTAX TREE: - (TOK_CREATEINDEX src1_index 'BITMAP' (TOK_TABNAME src) (TOK_TABCOLNAME key) TOK_DEFERRED_REBUILDINDEX) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -36,9 +33,6 @@ PREHOOK: type: CREATEINDEX POSTHOOK: query: EXPLAIN CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX -ABSTRACT SYNTAX TREE: - (TOK_CREATEINDEX src2_index 'BITMAP' (TOK_TABNAME src) (TOK_TABCOLNAME value) TOK_DEFERRED_REBUILDINDEX) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -149,9 +143,6 @@ POSTHOOK: Lineage: default__src_src2_index__._bitmaps EXPRESSION [(src)src.Field POSTHOOK: Lineage: default__src_src2_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src2_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME default__src_src1_index__))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL _bucketname) bucketname) (TOK_SELEXPR (TOK_TABLE_OR_COL _offset) offset) (TOK_SELEXPR (TOK_TABLE_OR_COL _bitmaps) bitmaps)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 0)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME default__src_src2_index__))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL _bucketname) bucketname) (TOK_SELEXPR (TOK_TABLE_OR_COL _offset) offset) (TOK_SELEXPR (TOK_TABLE_OR_COL _bitmaps) bitmaps)) (TOK_WHERE (= (TOK_TABLE_OR_COL value) "val_0")))) b) (AND (= (. (TOK_TABLE_OR_COL a) bucketname) (. (TOK_TABLE_OR_COL b) bucketname)) (= (. (TOK_TABLE_OR_COL a) offset) (. (TOK_TABLE_OR_COL b) offset))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) bucketname) _bucketname) (TOK_SELEXPR (TOK_FUNCTION COLLECT_SET (. (TOK_TABLE_OR_COL a) offset)) _offsets)) (TOK_WHERE (NOT (TOK_FUNCTION EWAH_BITMAP_EMPTY (TOK_FUNCTION EWAH_BITMAP_AND (. (TOK_TABLE_OR_COL a) bitmaps) (. (TOK_TABLE_OR_COL b) bitmaps))))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) bucketname)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -160,75 +151,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:default__src_src1_index__ + Map Operator Tree: TableScan alias: default__src_src1_index__ + Statistics: Num rows: 500 Data size: 59311 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 0) - type: boolean + predicate: (key = 0) (type: boolean) + Statistics: Num rows: 250 Data size: 29655 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _bucketname - type: string - expr: _offset - type: bigint - expr: _bitmaps - type: array + expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 29655 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: bigint - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: array - b:default__src_src2_index__ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 29655 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: array) TableScan alias: default__src_src2_index__ + Statistics: Num rows: 500 Data size: 61311 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (value = 'val_0') - type: boolean + predicate: (value = 'val_0') (type: boolean) + Statistics: Num rows: 250 Data size: 30655 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _bucketname - type: string - expr: _offset - type: bigint - expr: _bitmaps - type: array + expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 30655 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: bigint - tag: 1 - value expressions: - expr: _col2 - type: array + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 30655 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: array) Reduce Operator Tree: Join Operator condition map: @@ -236,31 +191,23 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col5 + Statistics: Num rows: 275 Data size: 32620 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (not EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(_col2,_col5))) - type: boolean + predicate: (not EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(_col2,_col5))) (type: boolean) + Statistics: Num rows: 138 Data size: 16369 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 138 Data size: 16369 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: collect_set(_col1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: collect_set(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 138 Data size: 16369 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -268,41 +215,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: array + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 138 Data size: 16369 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: array) Reduce Operator Tree: Group By Operator - aggregations: - expr: collect_set(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: collect_set(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 8184 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: array + expressions: _col0 (type: string), _col1 (type: array) outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 8184 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 69 Data size: 8184 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out index 9787a21..3f1f70c 100644 --- ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out +++ ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out @@ -39,9 +39,6 @@ POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr= POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL key) 86) (= (TOK_TABLE_OR_COL ds) '2008-04-09'))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 @@ -51,65 +48,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - tmp_index:ind0:default__srcpart_src_part_index__ + Map Operator Tree: TableScan alias: default__srcpart_src_part_index__ filterExpr: - expr: ((key = 86) and (not EWAH_BITMAP_EMPTY(_bitmaps))) - type: boolean Filter Operator - predicate: - expr: ((key = 86) and (not EWAH_BITMAP_EMPTY(_bitmaps))) - type: boolean + predicate: ((key = 86) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offset - type: bigint + expressions: _bucketname (type: string), _offset (type: bigint) outputColumnNames: _col0, _col1 Group By Operator - aggregations: - expr: collect_set(_col1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: collect_set(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: array + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: array) Reduce Operator Tree: Group By Operator - aggregations: - expr: collect_set(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: collect_set(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: array + expressions: _col0 (type: string), _col1 (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -123,40 +91,29 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart filterExpr: - expr: ((key = 86) and (ds = '2008-04-09')) - type: boolean + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_bitmap_compression.q.out ql/src/test/results/clientpositive/index_bitmap_compression.q.out index ea53369..f36326a 100644 --- ql/src/test/results/clientpositive/index_bitmap_compression.q.out +++ ql/src/test/results/clientpositive/index_bitmap_compression.q.out @@ -25,9 +25,6 @@ POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldS POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL key) 80) (< (TOK_TABLE_OR_COL key) 100))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 @@ -37,65 +34,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - tmp_index:ind0:default__src_src_index__ + Map Operator Tree: TableScan alias: default__src_src_index__ filterExpr: - expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) - type: boolean Filter Operator - predicate: - expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) - type: boolean + predicate: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offset - type: bigint + expressions: _bucketname (type: string), _offset (type: bigint) outputColumnNames: _col0, _col1 Group By Operator - aggregations: - expr: collect_set(_col1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: collect_set(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: array + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: array) Reduce Operator Tree: Group By Operator - aggregations: - expr: collect_set(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: collect_set(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: array + expressions: _col0 (type: string), _col1 (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -109,40 +77,29 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src filterExpr: - expr: ((key > 80) and (key < 100)) - type: boolean + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 80) and (key < 100)) - type: boolean + predicate: ((key > 80) and (key < 100)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_bitmap_rc.q.out ql/src/test/results/clientpositive/index_bitmap_rc.q.out index ef3ee96..9f8657f 100644 --- ql/src/test/results/clientpositive/index_bitmap_rc.q.out +++ ql/src/test/results/clientpositive/index_bitmap_rc.q.out @@ -77,9 +77,6 @@ POSTHOOK: Lineage: srcpart_rc PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(sr POSTHOOK: Lineage: srcpart_rc PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_rc PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_rc PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_CREATEINDEX srcpart_rc_index 'BITMAP' (TOK_TABNAME srcpart_rc) (TOK_TABCOLNAME key) TOK_DEFERRED_REBUILDINDEX) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -317,9 +314,6 @@ POSTHOOK: Lineage: srcpart_rc PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(sr POSTHOOK: Lineage: srcpart_rc PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_rc PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_rc PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_CREATEINDEX srcpart_rc_index 'BITMAP' (TOK_TABNAME srcpart_rc) (TOK_TABCOLNAME key) TOK_DEFERRED_REBUILDINDEX) - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/index_compact.q.out ql/src/test/results/clientpositive/index_compact.q.out index a65fa67..e30fa7f 100644 --- ql/src/test/results/clientpositive/index_compact.q.out +++ ql/src/test/results/clientpositive/index_compact.q.out @@ -8,9 +8,6 @@ PREHOOK: type: CREATEINDEX POSTHOOK: query: EXPLAIN CREATE INDEX srcpart_index_proj ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX -ABSTRACT SYNTAX TREE: - (TOK_CREATEINDEX srcpart_index_proj 'COMPACT' (TOK_TABNAME srcpart) (TOK_TABCOLNAME key) TOK_DEFERRED_REBUILDINDEX) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -151,9 +148,6 @@ POSTHOOK: type: CREATEINDEX POSTHOOK: Lineage: default__srcpart_srcpart_index_proj__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__srcpart_srcpart_index_proj__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__srcpart_srcpart_index_proj__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_CREATEINDEX srcpart_index_proj 'COMPACT' (TOK_TABNAME srcpart) (TOK_TABCOLNAME key) TOK_DEFERRED_REBUILDINDEX) - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/index_compact_1.q.out ql/src/test/results/clientpositive/index_compact_1.q.out index 17ac19d..a92eefa 100644 --- ql/src/test/results/clientpositive/index_compact_1.q.out +++ ql/src/test/results/clientpositive/index_compact_1.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATEINDEX POSTHOOK: query: EXPLAIN CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX -ABSTRACT SYNTAX TREE: - (TOK_CREATEINDEX src_index 'COMPACT' (TOK_TABNAME src) (TOK_TABCOLNAME key) TOK_DEFERRED_REBUILDINDEX) - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/index_compact_2.q.out ql/src/test/results/clientpositive/index_compact_2.q.out index e454964..6cde30d 100644 --- ql/src/test/results/clientpositive/index_compact_2.q.out +++ ql/src/test/results/clientpositive/index_compact_2.q.out @@ -277,9 +277,6 @@ POSTHOOK: Lineage: srcpart_rc PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(sr POSTHOOK: Lineage: srcpart_rc PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_rc PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_rc PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_CREATEINDEX srcpart_rc_index 'COMPACT' (TOK_TABNAME srcpart_rc) (TOK_TABCOLNAME key) TOK_DEFERRED_REBUILDINDEX) - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/index_compression.q.out ql/src/test/results/clientpositive/index_compression.q.out index 8d5c7de..ff2fe96 100644 --- ql/src/test/results/clientpositive/index_compression.q.out +++ ql/src/test/results/clientpositive/index_compression.q.out @@ -23,9 +23,6 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL key) 80) (< (TOK_TABLE_OR_COL key) 100))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-8 depends on stages: Stage-3 , consists of Stage-5, Stage-4, Stage-6 @@ -40,27 +37,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - default__src_src_index__ + Map Operator Tree: TableScan alias: default__src_src_index__ filterExpr: - expr: ((key > 80) and (key < 100)) - type: boolean Filter Operator - predicate: - expr: ((key > 80) and (key < 100)) - type: boolean + predicate: ((key > 80) and (key < 100)) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -83,40 +70,29 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src filterExpr: - expr: ((key > 80) and (key < 100)) - type: boolean + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 80) and (key < 100)) - type: boolean + predicate: ((key > 80) and (key < 100)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -124,12 +100,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -137,12 +111,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_serde.q.out ql/src/test/results/clientpositive/index_serde.q.out index a06c876..0a796b9 100644 --- ql/src/test/results/clientpositive/index_serde.q.out +++ ql/src/test/results/clientpositive/index_serde.q.out @@ -108,9 +108,6 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: default__doctors_doctors_index__._bucketname SIMPLE [(doctors)doctors.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__doctors_doctors_index__._offsets EXPRESSION [(doctors)doctors.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__doctors_doctors_index__.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME doctors))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL number) 6)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL number))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-8 depends on stages: Stage-3 , consists of Stage-5, Stage-4, Stage-6 @@ -125,27 +122,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - default__doctors_doctors_index__ + Map Operator Tree: TableScan alias: default__doctors_doctors_index__ filterExpr: - expr: (number > 6) - type: boolean Filter Operator - predicate: - expr: (number > 6) - type: boolean + predicate: (number > 6) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -168,44 +155,29 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - doctors + Map Operator Tree: TableScan alias: doctors filterExpr: - expr: (number > 6) - type: boolean + Statistics: Num rows: 2 Data size: 521 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (number > 6) - type: boolean + predicate: (number > 6) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: number - type: int - expr: first_name - type: string - expr: last_name - type: string + expressions: number (type: int), first_name (type: string), last_name (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -213,12 +185,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -226,12 +196,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_stale.q.out ql/src/test/results/clientpositive/index_stale.q.out index 1e4dba7..0a73a6a 100644 --- ql/src/test/results/clientpositive/index_stale.q.out +++ ql/src/test/results/clientpositive/index_stale.q.out @@ -69,9 +69,6 @@ POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME temp))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -79,27 +76,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - temp + Map Operator Tree: TableScan alias: temp filterExpr: - expr: (key = 86) - type: boolean + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_stale_partitioned.q.out ql/src/test/results/clientpositive/index_stale_partitioned.q.out index eed3095..4c39086 100644 --- ql/src/test/results/clientpositive/index_stale_partitioned.q.out +++ ql/src/test/results/clientpositive/index_stale_partitioned.q.out @@ -97,9 +97,6 @@ POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME temp))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL key) 86) (= (TOK_TABLE_OR_COL foo) 'bar'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -107,29 +104,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - temp + Map Operator Tree: TableScan alias: temp filterExpr: - expr: ((key = 86) and (foo = 'bar')) - type: boolean + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string - expr: foo - type: string + expressions: key (type: string), val (type: string), foo (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out index 5a5c788..10040b0 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out @@ -619,9 +619,6 @@ POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpar POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT) value)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2008-04-08')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION IF (== (% (TOK_TABLE_OR_COL key) 100) 0) '11' '12'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -635,58 +632,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToString(_col1) - type: string - expr: if(((_col0 % 100) = 0), '11', '12') - type: string + expressions: _col0 (type: string), UDFToString(_col1) (type: string), if(((_col0 % 100) = 0), '11', '12') (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat diff --git ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out index f8d0626..2bae6a8 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out @@ -24,9 +24,6 @@ POSTHOOK: query: -- Test rollup, should not be bucketed or sorted because its mi EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -35,76 +32,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string - expr: '0' - type: string + aggregations: count(1) + keys: key (type: string), value (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -266,9 +227,6 @@ POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.n POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -277,76 +235,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string - expr: '0' - type: string + aggregations: count(1) + keys: key (type: string), value (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -543,9 +465,6 @@ POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.n POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL key)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -554,76 +473,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string - expr: '0' - type: string + aggregations: count(1) + keys: key (type: string), value (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out index c8e816d..6717784 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out @@ -63,9 +63,6 @@ POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:s POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -79,34 +76,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test_table1 + Map Operator Tree: TableScan alias: test_table1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -139,12 +129,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -153,12 +141,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -256,9 +242,6 @@ POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:s POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) value)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) b) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -267,62 +250,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:test_table1 + Map Operator Tree: TableScan alias: test_table1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(_col1) - type: double + key expressions: UDFToDouble(_col1) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(_col1) - type: double - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - b:src + Map-reduce partition columns: UDFToDouble(_col1) (type: double) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(_col1) - type: double + key expressions: UDFToDouble(_col1) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(_col1) - type: double - tag: 1 + Map-reduce partition columns: UDFToDouble(_col1) (type: double) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -330,18 +293,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -456,9 +416,6 @@ POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)tes POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -472,32 +429,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -530,12 +480,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -544,12 +492,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -653,9 +599,6 @@ POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)a.F POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) value)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -664,68 +607,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col5 - type: string + expressions: _col5 (type: string) outputColumnNames: _col5 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col5 - type: string + aggregations: count() + keys: _col5 (type: string) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out index ed5c393..7c1ad28 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out @@ -45,9 +45,6 @@ FROM srcpart WHERE ds = '2008-04-08') a DISTRIBUTE BY key2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION if (< (% (TOK_TABLE_OR_COL key) 3) 2) 0 1) key2) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (% (TOK_TABLE_OR_COL key) 2) hr)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2008-04-08')))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key2)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL hr)))) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key2)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -56,37 +53,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: if(((key % 3) < 2), 0, 1) - type: int - expr: value - type: string - expr: UDFToInteger((key % 2)) - type: int + expressions: if(((key % 3) < 2), 0, 1) (type: int), value (type: string), UDFToInteger((key % 2)) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/infer_const_type.q.out ql/src/test/results/clientpositive/infer_const_type.q.out index 329228e..052b77e 100644 --- ql/src/test/results/clientpositive/infer_const_type.q.out +++ ql/src/test/results/clientpositive/infer_const_type.q.out @@ -42,9 +42,6 @@ POSTHOOK: query: EXPLAIN SELECT * FROM infertypes WHERE db = '-307' AND str = 1234 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME infertypes))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (AND (AND (AND (AND (AND (= (TOK_TABLE_OR_COL ti) '127') (= (TOK_TABLE_OR_COL si) 32767)) (= (TOK_TABLE_OR_COL i) '12345')) (= (TOK_TABLE_OR_COL bi) '-12345')) (= (TOK_TABLE_OR_COL fl) '0906')) (= (TOK_TABLE_OR_COL db) '-307')) (= (TOK_TABLE_OR_COL str) 1234))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -52,34 +49,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - infertypes + Map Operator Tree: TableScan alias: infertypes + Statistics: Num rows: 0 Data size: 117 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (((((((ti = 127) and (si = 32767)) and (i = 12345)) and (bi = -12345)) and (fl = 906.0)) and (db = -307.0)) and (str = 1234)) - type: boolean + predicate: (((((((ti = 127) and (si = 32767)) and (i = 12345)) and (bi = -12345)) and (fl = 906.0)) and (db = -307.0)) and (str = 1234)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: ti - type: tinyint - expr: si - type: smallint - expr: i - type: int - expr: bi - type: bigint - expr: fl - type: float - expr: db - type: double - expr: str - type: string + expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint), fl (type: float), db (type: double), str (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -134,9 +117,6 @@ EXPLAIN SELECT * FROM infertypes WHERE fl = 'float' OR db = 'double' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME infertypes))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (OR (OR (OR (OR (OR (= (TOK_TABLE_OR_COL ti) '128') (= (TOK_TABLE_OR_COL si) 32768)) (= (TOK_TABLE_OR_COL i) '2147483648')) (= (TOK_TABLE_OR_COL bi) '9223372036854775808')) (= (TOK_TABLE_OR_COL fl) 'float')) (= (TOK_TABLE_OR_COL db) 'double'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -144,34 +124,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - infertypes + Map Operator Tree: TableScan alias: infertypes + Statistics: Num rows: 0 Data size: 117 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (((((false or false) or false) or false) or false) or false) - type: boolean + predicate: (((((false or false) or false) or false) or false) or false) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: ti - type: tinyint - expr: si - type: smallint - expr: i - type: int - expr: bi - type: bigint - expr: fl - type: float - expr: db - type: double - expr: str - type: string + expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint), fl (type: float), db (type: double), str (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -213,9 +179,6 @@ EXPLAIN SELECT * FROM infertypes WHERE si = 327.0 OR i = '-100.0' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME infertypes))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (OR (OR (= (TOK_TABLE_OR_COL ti) '127.0') (= (TOK_TABLE_OR_COL si) 327.0)) (= (TOK_TABLE_OR_COL i) '-100.0'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -223,34 +186,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - infertypes + Map Operator Tree: TableScan alias: infertypes + Statistics: Num rows: 0 Data size: 117 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((false or false) or false) - type: boolean + predicate: ((false or false) or false) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: ti - type: tinyint - expr: si - type: smallint - expr: i - type: int - expr: bi - type: bigint - expr: fl - type: float - expr: db - type: double - expr: str - type: string + expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint), fl (type: float), db (type: double), str (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -284,9 +233,6 @@ POSTHOOK: query: EXPLAIN SELECT * FROM infertypes WHERE i > '100.0' AND str = 1.57 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME infertypes))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (AND (< (TOK_TABLE_OR_COL ti) '127.0') (> (TOK_TABLE_OR_COL i) '100.0')) (= (TOK_TABLE_OR_COL str) 1.57))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -294,34 +240,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - infertypes + Map Operator Tree: TableScan alias: infertypes + Statistics: Num rows: 0 Data size: 117 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (((ti < 127.0) and (i > 100.0)) and (str = 1.57)) - type: boolean + predicate: (((ti < 127.0) and (i > 100.0)) and (str = 1.57)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: ti - type: tinyint - expr: si - type: smallint - expr: i - type: int - expr: bi - type: bigint - expr: fl - type: float - expr: db - type: double - expr: str - type: string + expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint), fl (type: float), db (type: double), str (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/innerjoin.q.out ql/src/test/results/clientpositive/innerjoin.q.out index a5b2769..3cd4a15 100644 --- ql/src/test/results/clientpositive/innerjoin.q.out +++ ql/src/test/results/clientpositive/innerjoin.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src src1 INNER JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value ORDER BY src1.key, src2.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) src1) (TOK_TABREF (TOK_TABNAME src) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -23,37 +20,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1 + Map Operator Tree: TableScan - alias: src1 + alias: src2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - src2 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: src2 + alias: src1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -61,18 +46,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col5 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -80,34 +61,23 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1221,9 +1191,6 @@ on inner.key=src.key POSTHOOK: type: QUERY POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) inner) (TOK_TABREF (TOK_TABNAME src)) (= (. (TOK_TABLE_OR_COL inner) key) (. (TOK_TABLE_OR_COL src) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1231,48 +1198,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - inner:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - src - TableScan - alias: src - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1280,22 +1228,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input.q.out ql/src/test/results/clientpositive/input.q.out index 8ec8714..b01c8ca 100644 --- ql/src/test/results/clientpositive/input.q.out +++ ql/src/test/results/clientpositive/input.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.* FROM SRC x POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -17,13 +14,11 @@ STAGE PLANS: Processor Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT x.* FROM SRC x diff --git ql/src/test/results/clientpositive/input0.q.out ql/src/test/results/clientpositive/input0.q.out index 4672fbf..0ddceb7 100644 --- ql/src/test/results/clientpositive/input0.q.out +++ ql/src/test/results/clientpositive/input0.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -17,13 +14,11 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT * FROM src diff --git ql/src/test/results/clientpositive/input1.q.out ql/src/test/results/clientpositive/input1.q.out index 2e51cb4..adeeaf8 100644 --- ql/src/test/results/clientpositive/input1.q.out +++ ql/src/test/results/clientpositive/input1.q.out @@ -9,9 +9,6 @@ PREHOOK: type: DESCTABLE POSTHOOK: query: EXPLAIN DESCRIBE TEST1 POSTHOOK: type: DESCTABLE -ABSTRACT SYNTAX TREE: - (TOK_DESCTABLE (TOK_TABTYPE TEST1)) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage diff --git ql/src/test/results/clientpositive/input10.q.out ql/src/test/results/clientpositive/input10.q.out index eb971f7..4a2b785 100644 --- ql/src/test/results/clientpositive/input10.q.out +++ ql/src/test/results/clientpositive/input10.q.out @@ -9,9 +9,6 @@ PREHOOK: type: DESCTABLE POSTHOOK: query: EXPLAIN DESCRIBE TEST10 POSTHOOK: type: DESCTABLE -ABSTRACT SYNTAX TREE: - (TOK_DESCTABLE (TOK_TABTYPE TEST10)) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage diff --git ql/src/test/results/clientpositive/input11.q.out ql/src/test/results/clientpositive/input11.q.out index 0211281..f65891c 100644 --- ql/src/test/results/clientpositive/input11.q.out +++ ql/src/test/results/clientpositive/input11.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL src) key) 100)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -27,24 +24,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string + expressions: UDFToInteger(key) (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -75,12 +68,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -89,12 +80,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input11_limit.q.out ql/src/test/results/clientpositive/input11_limit.q.out index 1972862..6970a07 100644 --- ql/src/test/results/clientpositive/input11_limit.q.out +++ ql/src/test/results/clientpositive/input11_limit.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 100 LIMIT 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL src) key) 100)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,43 +19,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input12_hadoop20.q.out ql/src/test/results/clientpositive/input12_hadoop20.q.out index e4cf2b0..06c5d55 100644 --- ql/src/test/results/clientpositive/input12_hadoop20.q.out +++ ql/src/test/results/clientpositive/input12_hadoop20.q.out @@ -29,9 +29,6 @@ INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL src) key) 100))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_WHERE (and (>= (. (TOK_TABLE_OR_COL src) key) 100) (< (. (TOK_TABLE_OR_COL src) key) 200)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest3) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr '12')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key))) (TOK_WHERE (>= (. (TOK_TABLE_OR_COL src) key) 200)))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 @@ -59,60 +56,50 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string + expressions: UDFToInteger(key) (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Filter Operator - predicate: - expr: ((key >= 100) and (key < 200)) - type: boolean + predicate: ((key >= 100) and (key < 200)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string + expressions: UDFToInteger(key) (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 Filter Operator - predicate: - expr: (key >= 200) - type: boolean + predicate: (key >= 200) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int + expressions: UDFToInteger(key) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -143,12 +130,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -157,12 +142,10 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -199,12 +182,10 @@ STAGE PLANS: Stage: Stage-11 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -213,12 +194,10 @@ STAGE PLANS: Stage: Stage-13 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -258,12 +237,10 @@ STAGE PLANS: Stage: Stage-17 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -272,12 +249,10 @@ STAGE PLANS: Stage: Stage-19 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input13.q.out ql/src/test/results/clientpositive/input13.q.out index e20dc22..4824819 100644 --- ql/src/test/results/clientpositive/input13.q.out +++ ql/src/test/results/clientpositive/input13.q.out @@ -27,9 +27,6 @@ INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 and src.key < 300 INSERT OVERWRITE DIRECTORY 'target/warehouse/dest4.out' SELECT src.value WHERE src.key >= 300 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL src) key) 100))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_WHERE (and (>= (. (TOK_TABLE_OR_COL src) key) 100) (< (. (TOK_TABLE_OR_COL src) key) 200)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest3) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr '12')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key))) (TOK_WHERE (and (>= (. (TOK_TABLE_OR_COL src) key) 200) (< (. (TOK_TABLE_OR_COL src) key) 300)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR 'target/warehouse/dest4.out')) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_WHERE (>= (. (TOK_TABLE_OR_COL src) key) 300)))) - STAGE DEPENDENCIES: Stage-4 is a root stage Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 @@ -63,77 +60,65 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string + expressions: UDFToInteger(key) (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Filter Operator - predicate: - expr: ((key >= 100) and (key < 200)) - type: boolean + predicate: ((key >= 100) and (key < 200)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string + expressions: UDFToInteger(key) (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 Filter Operator - predicate: - expr: ((key >= 200) and (key < 300)) - type: boolean + predicate: ((key >= 200) and (key < 300)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int + expressions: UDFToInteger(key) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 Filter Operator - predicate: - expr: (key >= 300) - type: boolean + predicate: (key >= 300) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 4 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -163,12 +148,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -177,12 +160,10 @@ STAGE PLANS: Stage: Stage-8 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -219,12 +200,10 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -233,12 +212,10 @@ STAGE PLANS: Stage: Stage-14 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -278,12 +255,10 @@ STAGE PLANS: Stage: Stage-18 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -292,12 +267,10 @@ STAGE PLANS: Stage: Stage-20 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -327,12 +300,10 @@ STAGE PLANS: Stage: Stage-23 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -340,12 +311,10 @@ STAGE PLANS: Stage: Stage-25 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input14.q.out ql/src/test/results/clientpositive/input14.q.out index dfe3459..96f937c 100644 --- ql/src/test/results/clientpositive/input14.q.out +++ ql/src/test/results/clientpositive/input14.q.out @@ -21,9 +21,6 @@ FROM ( ) tmap INSERT OVERWRITE TABLE dest1 SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST tkey tvalue)))) (TOK_CLUSTERBY (TOK_TABLE_OR_COL tkey)))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) tkey)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) tvalue))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL tmap) tkey) 100)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -32,53 +29,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 100) - type: boolean + predicate: (_col0 < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input14_limit.q.out ql/src/test/results/clientpositive/input14_limit.q.out index 89403d4..59435ad 100644 --- ql/src/test/results/clientpositive/input14_limit.q.out +++ ql/src/test/results/clientpositive/input14_limit.q.out @@ -21,9 +21,6 @@ FROM ( ) tmap INSERT OVERWRITE TABLE dest1 SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST tkey tvalue)))) (TOK_CLUSTERBY (TOK_TABLE_OR_COL tkey)) (TOK_LIMIT 20))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) tkey)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) tvalue))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL tmap) tkey) 100)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -33,43 +30,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -77,40 +66,30 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 100) - type: boolean + predicate: (_col0 < 100) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input15.q.out ql/src/test/results/clientpositive/input15.q.out index 40aac9c..ce129f7 100644 --- ql/src/test/results/clientpositive/input15.q.out +++ ql/src/test/results/clientpositive/input15.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: EXPLAIN CREATE TABLE TEST15(key INT, value STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE POSTHOOK: type: CREATETABLE -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME TEST15) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL key TOK_INT) (TOK_TABCOL value TOK_STRING)) (TOK_TABLEROWFORMAT (TOK_SERDEPROPS (TOK_TABLEROWFORMATFIELD '\t'))) TOK_TBLTEXTFILE) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -16,12 +13,9 @@ STAGE PLANS: Create Table columns: key int, value string field delimiter: - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: TEST15 - isExternal: false PREHOOK: query: CREATE TABLE TEST15(key INT, value STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE PREHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/input17.q.out ql/src/test/results/clientpositive/input17.q.out index c33bf46..30f989e 100644 --- ql/src/test/results/clientpositive/input17.q.out +++ ql/src/test/results/clientpositive/input17.q.out @@ -21,9 +21,6 @@ FROM ( ) tmap INSERT OVERWRITE TABLE dest1 SELECT tmap.tkey, tmap.tvalue POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (+ (. (TOK_TABLE_OR_COL src_thrift) aint) ([ (. (TOK_TABLE_OR_COL src_thrift) lint) 0)) ([ (. (TOK_TABLE_OR_COL src_thrift) lintstring) 0)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST tkey tvalue)))) (TOK_CLUSTERBY (TOK_TABLE_OR_COL tkey)))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) tkey)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) tvalue))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -32,49 +29,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src_thrift + Map Operator Tree: TableScan alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: (aint + lint[0]) - type: int - expr: lintstring[0] - type: struct + expressions: (aint + lint[0]) (type: int), lintstring[0] (type: struct) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input18.q.out ql/src/test/results/clientpositive/input18.q.out index 06c41b3..6125a0c 100644 --- ql/src/test/results/clientpositive/input18.q.out +++ ql/src/test/results/clientpositive/input18.q.out @@ -21,9 +21,6 @@ FROM ( ) tmap INSERT OVERWRITE TABLE dest1 SELECT tmap.key, regexp_replace(tmap.value,'\t','+') WHERE tmap.key < 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value) (+ 1 2) (+ 3 4)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER))) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) key)) (TOK_SELEXPR (TOK_FUNCTION regexp_replace (. (TOK_TABLE_OR_COL tmap) value) '\t' '+'))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL tmap) key) 100)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -32,57 +29,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: (1 + 2) - type: int - expr: (3 + 4) - type: int + expressions: key (type: string), value (type: string), (1 + 2) (type: int), (3 + 4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 100) - type: boolean + predicate: (_col0 < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: regexp_replace(_col1, ' ', '+') - type: string + expressions: UDFToInteger(_col0) (type: int), regexp_replace(_col1, ' ', '+') (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input1_limit.q.out ql/src/test/results/clientpositive/input1_limit.q.out index 755d6ab..5ec6d83 100644 --- ql/src/test/results/clientpositive/input1_limit.q.out +++ ql/src/test/results/clientpositive/input1_limit.q.out @@ -18,9 +18,6 @@ FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 100 LIMIT 10 INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key < 100 LIMIT 5 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL src) key) 100)) (TOK_LIMIT 10)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL src) key) 100)) (TOK_LIMIT 5))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -32,62 +29,53 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Extract + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -109,30 +97,25 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input2.q.out ql/src/test/results/clientpositive/input2.q.out index 1a33d5b..933665f 100644 --- ql/src/test/results/clientpositive/input2.q.out +++ ql/src/test/results/clientpositive/input2.q.out @@ -78,9 +78,6 @@ PREHOOK: type: SHOWTABLES POSTHOOK: query: EXPLAIN SHOW TABLES POSTHOOK: type: SHOWTABLES -ABSTRACT SYNTAX TREE: - TOK_SHOWTABLES - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage diff --git ql/src/test/results/clientpositive/input20.q.out ql/src/test/results/clientpositive/input20.q.out index 247996c..80c32fd 100644 --- ql/src/test/results/clientpositive/input20.q.out +++ ql/src/test/results/clientpositive/input20.q.out @@ -29,9 +29,6 @@ REDUCE tmap.key, tmap.value USING 'python input20_script.py' AS key, value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) key)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER))) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL tmap) key) (. (TOK_TABLE_OR_COL tmap) value)) TOK_SERDE TOK_RECORDWRITER 'python input20_script.py' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST key value)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -40,64 +37,48 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: key - type: string + expressions: key (type: string), key (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: python input20_script.py output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input21.q.out ql/src/test/results/clientpositive/input21.q.out index 1469873..9715818 100644 --- ql/src/test/results/clientpositive/input21.q.out +++ ql/src/test/results/clientpositive/input21.q.out @@ -13,9 +13,6 @@ PREHOOK: query: EXPLAIN SELECT * FROM src_null DISTRIBUTE BY c SORT BY d PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM src_null DISTRIBUTE BY c SORT BY d POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_null))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL c)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL d))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -23,44 +20,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_null + Map Operator Tree: TableScan alias: src_null + Statistics: Num rows: 0 Data size: 130 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: a - type: string - expr: b - type: string - expr: c - type: string - expr: d - type: string + expressions: a (type: string), b (type: string), c (type: string), d (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 130 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col3 - type: string + key expressions: _col3 (type: string) sort order: + - Map-reduce partition columns: - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 0 Data size: 130 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 130 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 130 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input22.q.out ql/src/test/results/clientpositive/input22.q.out index 906cc50..ba69e50 100644 --- ql/src/test/results/clientpositive/input22.q.out +++ ql/src/test/results/clientpositive/input22.q.out @@ -21,9 +21,6 @@ FROM (SELECT INPUT4.*, INPUT4.KEY as KEY2 FROM INPUT4) a ORDER BY KEY2 LIMIT 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME INPUT4))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME INPUT4))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT4) KEY) KEY2)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) KEY2))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL KEY2))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -31,30 +28,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:input4 + Map Operator Tree: TableScan alias: input4 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 1000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input23.q.out ql/src/test/results/clientpositive/input23.q.out index 4880f0d..20bdfaf 100644 --- ql/src/test/results/clientpositive/input23.q.out +++ ql/src/test/results/clientpositive/input23.q.out @@ -5,7 +5,56 @@ POSTHOOK: query: explain extended select * from srcpart a join srcpart b where a.ds = '2008-04-08' and a.hr = '11' and b.ds = '2008-04-08' and b.hr = '14' limit 5 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) a) (TOK_TABREF (TOK_TABNAME srcpart) b))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (and (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08') (= (. (TOK_TABLE_OR_COL a) hr) '11')) (= (. (TOK_TABLE_OR_COL b) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL b) hr) '14'))) (TOK_LIMIT 5))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcpart + a + TOK_TABREF + TOK_TABNAME + srcpart + b + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + and + and + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + a + hr + '11' + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + b + hr + '14' + TOK_LIMIT + 5 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -14,54 +63,16 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator sort order: - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string - b - TableScan - alias: b - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: ((ds = '2008-04-08') and (hr = '14')) - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Reduce Output Operator - sort order: - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + value expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -117,41 +128,21 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col6, _col7, _col8, _col9 - Statistics: - numRows: 31 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: - numRows: 31 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Limit - Statistics: - numRows: 5 dataSize: 1030 basicStatsState: COMPLETE colStatsState: NONE + Number of rows: 5 + Statistics: Num rows: 5 Data size: 1030 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 5 dataSize: 1030 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 5 Data size: 1030 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/input24.q.out ql/src/test/results/clientpositive/input24.q.out index a0361d3..c09f4f5 100644 --- ql/src/test/results/clientpositive/input24.q.out +++ ql/src/test/results/clientpositive/input24.q.out @@ -16,9 +16,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(1) from tst x where x.d='2009-01-01' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tst) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) d) '2009-01-01')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -26,38 +23,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input25.q.out ql/src/test/results/clientpositive/input25.q.out index c97a819..657bbb5 100644 --- ql/src/test/results/clientpositive/input25.q.out +++ ql/src/test/results/clientpositive/input25.q.out @@ -31,9 +31,6 @@ select * from ( select * from tst x where x.d='2009-02-02' limit 10 ) subq POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tst) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) d) '2009-01-01')) (TOK_LIMIT 10))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tst) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) d) '2009-02-02')) (TOK_LIMIT 10)))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -43,36 +40,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:subq-subquery1:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: a - type: int - expr: b - type: int - expr: d - type: string + expressions: a (type: int), b (type: int), d (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -80,41 +70,31 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -122,36 +102,29 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:subq-subquery2:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: a - type: int - expr: b - type: int - expr: d - type: string + expressions: a (type: int), b (type: int), d (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/input26.q.out ql/src/test/results/clientpositive/input26.q.out index 852c5dd..da69bf4 100644 --- ql/src/test/results/clientpositive/input26.q.out +++ ql/src/test/results/clientpositive/input26.q.out @@ -12,9 +12,6 @@ select * from ( select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5 )subq POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08') (= (. (TOK_TABLE_OR_COL a) hr) '11'))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 5))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL b) ds) '2008-04-08') (= (. (TOK_TABLE_OR_COL b) hr) '14'))) (TOK_LIMIT 5)))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -24,42 +21,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:subq-subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -67,45 +49,31 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -113,44 +81,14 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:subq-subquery2:b - TableScan - alias: b - Filter Operator - predicate: - expr: ((ds = '2008-04-08') and (hr = '14')) - type: boolean - Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string - outputColumnNames: _col0, _col1, _col2, _col3 - Limit - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Limit + Number of rows: 5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/input2_limit.q.out ql/src/test/results/clientpositive/input2_limit.q.out index 5cd8a96..90b56a6 100644 --- ql/src/test/results/clientpositive/input2_limit.q.out +++ ql/src/test/results/clientpositive/input2_limit.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.* FROM SRC x WHERE x.key < 300 LIMIT 5 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 300)) (TOK_LIMIT 5))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,25 +11,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 300) - type: boolean + predicate: (key < 300) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input3.q.out ql/src/test/results/clientpositive/input3.q.out index 58231b1..69a7d33 100644 --- ql/src/test/results/clientpositive/input3.q.out +++ ql/src/test/results/clientpositive/input3.q.out @@ -42,9 +42,6 @@ PREHOOK: type: ALTERTABLE_ADDCOLS POSTHOOK: query: EXPLAIN ALTER TABLE TEST3b ADD COLUMNS (X DOUBLE) POSTHOOK: type: ALTERTABLE_ADDCOLS -ABSTRACT SYNTAX TREE: - (TOK_ALTERTABLE_ADDCOLS TEST3b (TOK_TABCOLLIST (TOK_TABCOL X TOK_DOUBLE))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -78,9 +75,6 @@ PREHOOK: type: ALTERTABLE_RENAME POSTHOOK: query: EXPLAIN ALTER TABLE TEST3b RENAME TO TEST3c POSTHOOK: type: ALTERTABLE_RENAME -ABSTRACT SYNTAX TREE: - (TOK_ALTERTABLE_RENAME TEST3b TEST3c) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -130,9 +124,6 @@ PREHOOK: type: ALTERTABLE_REPLACECOLS POSTHOOK: query: EXPLAIN ALTER TABLE TEST3c REPLACE COLUMNS (R1 INT, R2 DOUBLE) POSTHOOK: type: ALTERTABLE_REPLACECOLS -ABSTRACT SYNTAX TREE: - (TOK_ALTERTABLE_REPLACECOLS TEST3c (TOK_TABCOLLIST (TOK_TABCOL R1 TOK_INT) (TOK_TABCOL R2 TOK_DOUBLE))) - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/input30.q.out ql/src/test/results/clientpositive/input30.q.out index eb88751..4deb400 100644 --- ql/src/test/results/clientpositive/input30.q.out +++ ql/src/test/results/clientpositive/input30.q.out @@ -16,9 +16,6 @@ POSTHOOK: query: explain insert overwrite table dest30 select count(1) from src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest30))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -27,42 +24,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Filter Operator - predicate: - expr: (((hash(rand(460476415)) & 2147483647) % 32) = 0) - type: boolean + predicate: (((hash(rand(460476415)) & 2147483647) % 32) = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int + expressions: UDFToInteger(_col0) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input31.q.out ql/src/test/results/clientpositive/input31.q.out index 1491a6d..4af7681 100644 --- ql/src/test/results/clientpositive/input31.q.out +++ ql/src/test/results/clientpositive/input31.q.out @@ -16,9 +16,6 @@ POSTHOOK: query: explain insert overwrite table dest31 select count(1) from srcbucket POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcbucket))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest31))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -27,42 +24,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcbucket + Map Operator Tree: TableScan alias: srcbucket + Statistics: Num rows: 2900 Data size: 11603 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((hash(key) & 2147483647) % 2) = 0) - type: boolean + predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) + Statistics: Num rows: 1450 Data size: 5801 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 1450 Data size: 5801 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int + expressions: UDFToInteger(_col0) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input32.q.out ql/src/test/results/clientpositive/input32.q.out index 2140954..1c859f7 100644 --- ql/src/test/results/clientpositive/input32.q.out +++ ql/src/test/results/clientpositive/input32.q.out @@ -16,9 +16,6 @@ POSTHOOK: query: explain insert overwrite table dest32 select count(1) from srcbucket POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcbucket))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest32))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -27,38 +24,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcbucket + Map Operator Tree: TableScan alias: srcbucket + Statistics: Num rows: 0 Data size: 11603 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 11603 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int + expressions: UDFToInteger(_col0) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input33.q.out ql/src/test/results/clientpositive/input33.q.out index 06eacc0..5adc052 100644 --- ql/src/test/results/clientpositive/input33.q.out +++ ql/src/test/results/clientpositive/input33.q.out @@ -29,9 +29,6 @@ REDUCE tmap.key, tmap.value USING 'python input20_script.py' AS (key STRING, value STRING) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) key)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER))) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL tmap) key) (. (TOK_TABLE_OR_COL tmap) value)) TOK_SERDE TOK_RECORDWRITER 'python input20_script.py' TOK_SERDE TOK_RECORDREADER (TOK_TABCOLLIST (TOK_TABCOL key TOK_STRING) (TOK_TABCOL value TOK_STRING))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -40,64 +37,48 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: key - type: string + expressions: key (type: string), key (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: python input20_script.py output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input34.q.out ql/src/test/results/clientpositive/input34.q.out index fe7132e..bfe819c 100644 --- ql/src/test/results/clientpositive/input34.q.out +++ ql/src/test/results/clientpositive/input34.q.out @@ -21,9 +21,6 @@ FROM ( ) tmap INSERT OVERWRITE TABLE dest1 SELECT tkey, tvalue POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value)) (TOK_SERDE (TOK_SERDENAME 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe')) TOK_RECORDWRITER 'cat' (TOK_SERDE (TOK_SERDENAME 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe')) TOK_RECORDREADER (TOK_ALIASLIST tkey tvalue)))))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL tkey)) (TOK_SELEXPR (TOK_TABLE_OR_COL tvalue))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -37,33 +34,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -94,12 +86,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -108,12 +98,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input35.q.out ql/src/test/results/clientpositive/input35.q.out index f1c5199..84dff36 100644 --- ql/src/test/results/clientpositive/input35.q.out +++ ql/src/test/results/clientpositive/input35.q.out @@ -21,9 +21,6 @@ FROM ( ) tmap INSERT OVERWRITE TABLE dest1 SELECT tkey, tvalue POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value)) (TOK_SERDE (TOK_SERDEPROPS (TOK_TABLEROWFORMATFIELD '\002'))) TOK_RECORDWRITER 'cat' (TOK_SERDE (TOK_SERDEPROPS (TOK_TABLEROWFORMATFIELD '\002'))) TOK_RECORDREADER (TOK_ALIASLIST tkey tvalue)))))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL tkey)) (TOK_SELEXPR (TOK_TABLE_OR_COL tvalue))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -37,33 +34,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -94,12 +86,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -108,12 +98,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input36.q.out ql/src/test/results/clientpositive/input36.q.out index 6c15d8e..585eb31 100644 --- ql/src/test/results/clientpositive/input36.q.out +++ ql/src/test/results/clientpositive/input36.q.out @@ -21,9 +21,6 @@ FROM ( ) tmap INSERT OVERWRITE TABLE dest1 SELECT tkey, tvalue POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value)) (TOK_SERDE (TOK_SERDEPROPS (TOK_TABLEROWFORMATFIELD '\002'))) TOK_RECORDWRITER 'cat' (TOK_SERDE (TOK_SERDEPROPS (TOK_TABLEROWFORMATFIELD '\003'))) TOK_RECORDREADER (TOK_ALIASLIST tkey tvalue)))))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL tkey)) (TOK_SELEXPR (TOK_TABLE_OR_COL tvalue))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -37,33 +34,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -94,12 +86,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -108,12 +98,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input38.q.out ql/src/test/results/clientpositive/input38.q.out index a4e2e97..048b800 100644 --- ql/src/test/results/clientpositive/input38.q.out +++ ql/src/test/results/clientpositive/input38.q.out @@ -19,9 +19,6 @@ FROM ( ) tmap INSERT OVERWRITE TABLE dest1 SELECT tmap.key, tmap.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value) (+ 1 2) (+ 3 4)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER))))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -35,37 +32,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: (1 + 2) - type: int - expr: (3 + 4) - type: int + expressions: key (type: string), value (type: string), (1 + 2) (type: int), (3 + 4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -96,12 +84,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -110,12 +96,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input39_hadoop20.q.out ql/src/test/results/clientpositive/input39_hadoop20.q.out index e169495..3ef4ce6 100644 --- ql/src/test/results/clientpositive/input39_hadoop20.q.out +++ ql/src/test/results/clientpositive/input39_hadoop20.q.out @@ -68,9 +68,6 @@ POSTHOOK: Lineage: t1 PARTITION(ds=2).key SIMPLE [(src)src.FieldSchema(name:key, POSTHOOK: Lineage: t1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: t2 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME t1)) (TOK_TABREF (TOK_TABNAME t2)) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL t1) ds) '1') (= (. (TOK_TABLE_OR_COL t2) ds) '1'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -79,39 +76,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan - alias: t1 + alias: t2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((hash(rand(460476415)) & 2147483647) % 32) = 0) - type: boolean + predicate: (((hash(rand(460476415)) & 2147483647) % 32) = 0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - t2 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: t2 + alias: t1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((hash(rand(460476415)) & 2147483647) % 32) = 0) - type: boolean + predicate: (((hash(rand(460476415)) & 2147483647) % 32) = 0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -119,17 +106,16 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -137,30 +123,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input3_limit.q.out ql/src/test/results/clientpositive/input3_limit.q.out index 943a9a7..0401325 100644 --- ql/src/test/results/clientpositive/input3_limit.q.out +++ ql/src/test/results/clientpositive/input3_limit.q.out @@ -26,9 +26,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) T)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME T2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -38,46 +35,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -85,23 +68,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input4.q.out ql/src/test/results/clientpositive/input4.q.out index 461cb76..52d4902 100644 --- ql/src/test/results/clientpositive/input4.q.out +++ ql/src/test/results/clientpositive/input4.q.out @@ -9,9 +9,6 @@ PREHOOK: type: LOAD POSTHOOK: query: EXPLAIN LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4 POSTHOOK: type: LOAD -ABSTRACT SYNTAX TREE: - (TOK_LOAD '../../data/files/kv1.txt' (TOK_TAB (TOK_TABNAME INPUT4)) LOCAL) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 depends on stages: Stage-0 @@ -47,7 +44,7 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FORMATTED SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias POSTHOOK: type: QUERY -{"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Vectorized execution:":"false","Map:":{"Vectorized execution:":"false","Split Sample:":{},"Alias -> Map Operator Tree:":{"input4alias":{"TS_0":{"SEL_1":{"FS_2":{"File Output Operator":{"Vectorized execution:":"false","GlobalTableId:":"0","compressed:":"false","table:":{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}}}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1"}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"ROOT STAGE":"TRUE"}},"ABSTRACT SYNTAX TREE":"(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME INPUT4) Input4Alias)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL Input4Alias) VALUE)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL Input4Alias) KEY)))))"} +{"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"input4alias","children":{"Select Operator":{"expressions:":"value (type: string), key (type: string)","outputColumnNames:":["_col0","_col1"],"children":{"File Output Operator":{"Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE","compressed:":"false","table:":{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}},"Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE"}}]}},"Stage-0":{"Fetch Operator":{"limit:":"-1"}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"ROOT STAGE":"TRUE"}}} PREHOOK: query: SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias PREHOOK: type: QUERY PREHOOK: Input: default@input4 diff --git ql/src/test/results/clientpositive/input42.q.out ql/src/test/results/clientpositive/input42.q.out index bdea323..4499205 100644 --- ql/src/test/results/clientpositive/input42.q.out +++ ql/src/test/results/clientpositive/input42.q.out @@ -5,7 +5,39 @@ POSTHOOK: query: explain extended select * from srcpart a where a.ds='2008-04-08' order by a.key, a.hr POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08')) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) hr))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + a + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + key + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + hr + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -14,45 +46,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col3 (type: string) sort order: ++ - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -146,15 +154,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1194,7 +1200,44 @@ POSTHOOK: query: explain extended select * from srcpart a where a.ds='2008-04-08' and key < 200 order by a.key, a.hr POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08') (< (TOK_TABLE_OR_COL key) 200))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) hr))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + a + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + < + TOK_TABLE_OR_COL + key + 200 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + key + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + hr + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1203,52 +1246,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 200) - type: boolean - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 200) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col3 (type: string) sort order: ++ - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1342,15 +1358,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1768,7 +1782,45 @@ POSTHOOK: query: explain extended select * from srcpart a where a.ds='2008-04-08' and rand(100) < 0.1 order by a.key, a.hr POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08') (< (TOK_FUNCTION rand 100) 0.1))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) hr))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + a + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + < + TOK_FUNCTION + rand + 100 + 0.1 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + key + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + hr + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1777,52 +1829,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (rand(100) < 0.1) - type: boolean - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + predicate: (rand(100) < 0.1) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col3 (type: string) sort order: ++ - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1916,15 +1941,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/input4_limit.q.out ql/src/test/results/clientpositive/input4_limit.q.out index 1415f6b..406decc 100644 --- ql/src/test/results/clientpositive/input4_limit.q.out +++ ql/src/test/results/clientpositive/input4_limit.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from src sort by key limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -15,34 +12,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -50,26 +40,22 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input5.q.out ql/src/test/results/clientpositive/input5.q.out index 3bced40..057354f 100644 --- ql/src/test/results/clientpositive/input5.q.out +++ ql/src/test/results/clientpositive/input5.q.out @@ -21,9 +21,6 @@ FROM ( ) tmap INSERT OVERWRITE TABLE dest1 SELECT tmap.tkey, tmap.tvalue POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src_thrift) lint) (. (TOK_TABLE_OR_COL src_thrift) lintstring)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST tkey tvalue)))) (TOK_CLUSTERBY (TOK_TABLE_OR_COL tkey)))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) tkey)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) tvalue))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -32,49 +29,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src_thrift + Map Operator Tree: TableScan alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: lint - type: array - expr: lintstring - type: array> + expressions: lint (type: array), lintstring (type: array>) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input6.q.out ql/src/test/results/clientpositive/input6.q.out index da6d674..c02622c 100644 --- ql/src/test/results/clientpositive/input6.q.out +++ ql/src/test/results/clientpositive/input6.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT src1.key, src1.value WHERE src1.key is null POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) value))) (TOK_WHERE (TOK_FUNCTION TOK_ISNULL (. (TOK_TABLE_OR_COL src1) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -27,24 +24,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1 + Map Operator Tree: TableScan alias: src1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: key is null - type: boolean + predicate: key is null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -75,12 +68,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -89,12 +80,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input7.q.out ql/src/test/results/clientpositive/input7.q.out index c6714fd..60f212c 100644 --- ql/src/test/results/clientpositive/input7.q.out +++ ql/src/test/results/clientpositive/input7.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT NULL, src1.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR TOK_NULL) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -27,20 +24,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1 + Map Operator Tree: TableScan alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToDouble(null) - type: double - expr: UDFToInteger(key) - type: int + expressions: UDFToDouble(null) (type: double), UDFToInteger(key) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -71,12 +65,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -85,12 +77,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input8.q.out ql/src/test/results/clientpositive/input8.q.out index 5d760a6..338d605 100644 --- ql/src/test/results/clientpositive/input8.q.out +++ ql/src/test/results/clientpositive/input8.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT 4 + NULL, src1.key - NULL, NULL + NULL POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (+ 4 TOK_NULL)) (TOK_SELEXPR (- (. (TOK_TABLE_OR_COL src1) key) TOK_NULL)) (TOK_SELEXPR (+ TOK_NULL TOK_NULL))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -27,22 +24,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1 + Map Operator Tree: TableScan alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (4 + null) - type: double - expr: UDFToInteger((key - null)) - type: int - expr: (null + null) - type: double + expressions: (4 + null) (type: double), UDFToInteger((key - null)) (type: int), (null + null) (type: double) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -73,12 +65,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -87,12 +77,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input9.q.out ql/src/test/results/clientpositive/input9.q.out index 4cd83eb..c6b51b7 100644 --- ql/src/test/results/clientpositive/input9.q.out +++ ql/src/test/results/clientpositive/input9.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT NULL, src1.key where NULL = NULL POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR TOK_NULL) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key))) (TOK_WHERE (= TOK_NULL TOK_NULL)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -27,24 +24,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1 + Map Operator Tree: TableScan alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (null = null) - type: boolean + predicate: (null = null) (type: boolean) + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: null - type: string - expr: UDFToInteger(key) - type: int + expressions: null (type: string), UDFToInteger(key) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -75,12 +68,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -89,12 +80,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input_columnarserde.q.out ql/src/test/results/clientpositive/input_columnarserde.q.out index e25c518..7e6c9b5 100644 --- ql/src/test/results/clientpositive/input_columnarserde.q.out +++ ql/src/test/results/clientpositive/input_columnarserde.q.out @@ -21,9 +21,6 @@ POSTHOOK: query: EXPLAIN FROM src_thrift INSERT OVERWRITE TABLE input_columnarserde SELECT src_thrift.lint, src_thrift.lstring, src_thrift.mstringstring, src_thrift.aint, src_thrift.astring DISTRIBUTE BY 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME input_columnarserde))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) lint)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) lstring)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) mstringstring)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) aint)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) astring))) (TOK_DISTRIBUTEBY 1))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -32,45 +29,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_thrift + Map Operator Tree: TableScan alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: lint - type: array - expr: lstring - type: array - expr: mstringstring - type: map - expr: aint - type: int - expr: astring - type: string + expressions: lint (type: array), lstring (type: array), mstringstring (type: map), aint (type: int), astring (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: 1 - type: int - tag: -1 - value expressions: - expr: _col0 - type: array - expr: _col1 - type: array - expr: _col2 - type: map - expr: _col3 - type: int - expr: _col4 - type: string + Map-reduce partition columns: 1 (type: int) + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: array), _col1 (type: array), _col2 (type: map), _col3 (type: int), _col4 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat diff --git ql/src/test/results/clientpositive/input_dynamicserde.q.out ql/src/test/results/clientpositive/input_dynamicserde.q.out index 38ac201..65dc58a 100644 --- ql/src/test/results/clientpositive/input_dynamicserde.q.out +++ ql/src/test/results/clientpositive/input_dynamicserde.q.out @@ -23,9 +23,6 @@ POSTHOOK: query: EXPLAIN FROM src_thrift INSERT OVERWRITE TABLE dest1 SELECT src_thrift.lint, src_thrift.lstring, src_thrift.mstringstring, src_thrift.aint, src_thrift.astring POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) lint)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) lstring)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) mstringstring)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) aint)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) astring))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -39,26 +36,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_thrift + Map Operator Tree: TableScan alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: lint - type: array - expr: lstring - type: array - expr: mstringstring - type: map - expr: aint - type: int - expr: astring - type: string + expressions: lint (type: array), lstring (type: array), mstringstring (type: map), aint (type: int), astring (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -89,12 +77,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -103,12 +89,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input_lazyserde.q.out ql/src/test/results/clientpositive/input_lazyserde.q.out index ec85023..066374a 100644 --- ql/src/test/results/clientpositive/input_lazyserde.q.out +++ ql/src/test/results/clientpositive/input_lazyserde.q.out @@ -23,9 +23,6 @@ POSTHOOK: query: EXPLAIN FROM src_thrift INSERT OVERWRITE TABLE dest1 SELECT src_thrift.lint, src_thrift.lstring, src_thrift.mstringstring, src_thrift.aint, src_thrift.astring DISTRIBUTE BY 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) lint)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) lstring)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) mstringstring)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) aint)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src_thrift) astring))) (TOK_DISTRIBUTEBY 1))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -34,45 +31,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_thrift + Map Operator Tree: TableScan alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: lint - type: array - expr: lstring - type: array - expr: mstringstring - type: map - expr: aint - type: int - expr: astring - type: string + expressions: lint (type: array), lstring (type: array), mstringstring (type: map), aint (type: int), astring (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: 1 - type: int - tag: -1 - value expressions: - expr: _col0 - type: array - expr: _col1 - type: array - expr: _col2 - type: map - expr: _col3 - type: int - expr: _col4 - type: string + Map-reduce partition columns: 1 (type: int) + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: array), _col1 (type: array), _col2 (type: map), _col3 (type: int), _col4 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input_limit.q.out ql/src/test/results/clientpositive/input_limit.q.out index b9971f2..6438e3d 100644 --- ql/src/test/results/clientpositive/input_limit.q.out +++ ql/src/test/results/clientpositive/input_limit.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.* FROM SRC x LIMIT 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -17,14 +14,14 @@ STAGE PLANS: Processor Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT x.* FROM SRC x LIMIT 20 diff --git ql/src/test/results/clientpositive/input_part0.q.out ql/src/test/results/clientpositive/input_part0.q.out index b060134..7a1d73e 100644 --- ql/src/test/results/clientpositive/input_part0.q.out +++ ql/src/test/results/clientpositive/input_part0.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.* FROM SRCPART x WHERE x.ds = '2008-04-08' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRCPART) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) ds) '2008-04-08')))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -17,17 +14,11 @@ STAGE PLANS: Processor Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT x.* FROM SRCPART x WHERE x.ds = '2008-04-08' diff --git ql/src/test/results/clientpositive/input_part1.q.out ql/src/test/results/clientpositive/input_part1.q.out index b39e5f7..4c1d89b 100644 --- ql/src/test/results/clientpositive/input_part1.q.out +++ ql/src/test/results/clientpositive/input_part1.q.out @@ -12,7 +12,60 @@ FROM srcpart INSERT OVERWRITE TABLE dest1 SELECT srcpart.key, srcpart.value, srcpart.hr, srcpart.ds WHERE srcpart.key < 100 and srcpart.ds = '2008-04-08' and srcpart.hr = '12' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) hr)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) ds))) (TOK_WHERE (and (and (< (. (TOK_TABLE_OR_COL srcpart) key) 100) (= (. (TOK_TABLE_OR_COL srcpart) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL srcpart) hr) '12'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest1 + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + srcpart + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + srcpart + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + srcpart + hr + TOK_SELEXPR + . + TOK_TABLE_OR_COL + srcpart + ds + TOK_WHERE + and + and + < + . + TOK_TABLE_OR_COL + srcpart + key + 100 + = + . + TOK_TABLE_OR_COL + srcpart + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + srcpart + hr + '12' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -27,40 +80,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 100) - type: boolean - Statistics: - numRows: 9 dataSize: 1803 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string - expr: hr - type: string - expr: ds - type: string + expressions: UDFToInteger(key) (type: int), value (type: string), hr (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 9 dataSize: 1803 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 9 dataSize: 1803 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -164,8 +202,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -231,8 +268,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/input_part10.q.out ql/src/test/results/clientpositive/input_part10.q.out index 902f821..4cbacca 100644 --- ql/src/test/results/clientpositive/input_part10.q.out +++ ql/src/test/results/clientpositive/input_part10.q.out @@ -29,9 +29,6 @@ POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455') SELECT 1, 2 FROM src LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME part_special) (TOK_PARTSPEC (TOK_PARTVAL ds '2008 04 08') (TOK_PARTVAL ts '10:11:12=455')))) (TOK_SELECT (TOK_SELEXPR 1) (TOK_SELEXPR 2)) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -40,32 +37,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: 1 - type: int - expr: 2 - type: int + expressions: 1 (type: int), 2 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input_part2.q.out ql/src/test/results/clientpositive/input_part2.q.out index 08962cd..ea22329 100644 --- ql/src/test/results/clientpositive/input_part2.q.out +++ ql/src/test/results/clientpositive/input_part2.q.out @@ -19,7 +19,107 @@ INSERT OVERWRITE TABLE dest1 SELECT srcpart.key, srcpart.value, srcpart.hr, srcp INSERT OVERWRITE TABLE dest2 SELECT srcpart.key, srcpart.value, srcpart.hr, srcpart.ds WHERE srcpart.key < 100 and srcpart.ds = '2008-04-09' and srcpart.hr = '12' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) hr)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) ds))) (TOK_WHERE (and (and (< (. (TOK_TABLE_OR_COL srcpart) key) 100) (= (. (TOK_TABLE_OR_COL srcpart) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL srcpart) hr) '12')))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) hr)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) ds))) (TOK_WHERE (and (and (< (. (TOK_TABLE_OR_COL srcpart) key) 100) (= (. (TOK_TABLE_OR_COL srcpart) ds) '2008-04-09')) (= (. (TOK_TABLE_OR_COL srcpart) hr) '12'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest1 + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + srcpart + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + srcpart + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + srcpart + hr + TOK_SELEXPR + . + TOK_TABLE_OR_COL + srcpart + ds + TOK_WHERE + and + and + < + . + TOK_TABLE_OR_COL + srcpart + key + 100 + = + . + TOK_TABLE_OR_COL + srcpart + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + srcpart + hr + '12' + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest2 + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + srcpart + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + srcpart + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + srcpart + hr + TOK_SELEXPR + . + TOK_TABLE_OR_COL + srcpart + ds + TOK_WHERE + and + and + < + . + TOK_TABLE_OR_COL + srcpart + key + 100 + = + . + TOK_TABLE_OR_COL + srcpart + ds + '2008-04-09' + = + . + TOK_TABLE_OR_COL + srcpart + hr + '12' + STAGE DEPENDENCIES: Stage-2 is a root stage @@ -41,40 +141,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key < 100) and (ds = '2008-04-08')) - type: boolean - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key < 100) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string - expr: hr - type: string - expr: ds - type: string + expressions: UDFToInteger(key) (type: int), value (type: string), hr (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -96,31 +181,18 @@ STAGE PLANS: MultiFileSpray: false Filter Operator isSamplingPred: false - predicate: - expr: ((key < 100) and (ds = '2008-04-09')) - type: boolean - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key < 100) and (ds = '2008-04-09')) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string - expr: hr - type: string - expr: ds - type: string + expressions: UDFToInteger(key) (type: int), value (type: string), hr (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -267,8 +339,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -334,8 +405,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -441,8 +511,7 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -508,8 +577,7 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/input_part3.q.out ql/src/test/results/clientpositive/input_part3.q.out index a05e540..2124763 100644 --- ql/src/test/results/clientpositive/input_part3.q.out +++ ql/src/test/results/clientpositive/input_part3.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.* FROM SRCPART x WHERE x.ds = '2008-04-08' and x.hr = 11 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRCPART) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL x) ds) '2008-04-08') (= (. (TOK_TABLE_OR_COL x) hr) 11))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -17,17 +14,11 @@ STAGE PLANS: Processor Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT x.* FROM SRCPART x WHERE x.ds = '2008-04-08' and x.hr = 11 diff --git ql/src/test/results/clientpositive/input_part4.q.out ql/src/test/results/clientpositive/input_part4.q.out index 6d19c9e..dbc5cf5 100644 --- ql/src/test/results/clientpositive/input_part4.q.out +++ ql/src/test/results/clientpositive/input_part4.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.* FROM SRCPART x WHERE x.ds = '2008-04-08' and x.hr = 15 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRCPART) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL x) ds) '2008-04-08') (= (. (TOK_TABLE_OR_COL x) hr) 15))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -17,21 +14,14 @@ STAGE PLANS: Processor Tree: TableScan alias: x + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: ((ds = '2008-04-08') and (hr = 15)) - type: boolean + predicate: ((ds = '2008-04-08') and (hr = 15)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE ListSink PREHOOK: query: SELECT x.* FROM SRCPART x WHERE x.ds = '2008-04-08' and x.hr = 15 diff --git ql/src/test/results/clientpositive/input_part5.q.out ql/src/test/results/clientpositive/input_part5.q.out index 83356aa..1499c94 100644 --- ql/src/test/results/clientpositive/input_part5.q.out +++ ql/src/test/results/clientpositive/input_part5.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN insert overwrite table tmptable SELECT x.* FROM SRCPART x WHERE x.ds = '2008-04-08' and x.key < 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRCPART) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME tmptable))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL x) ds) '2008-04-08') (< (. (TOK_TABLE_OR_COL x) key) 100))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -27,28 +24,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -79,12 +68,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -93,12 +80,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input_part6.q.out ql/src/test/results/clientpositive/input_part6.q.out index d5eab9d..5c20862 100644 --- ql/src/test/results/clientpositive/input_part6.q.out +++ ql/src/test/results/clientpositive/input_part6.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.* FROM SRCPART x WHERE x.ds = 2008-04-08 LIMIT 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRCPART) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) ds) (- (- 2008 04) 08))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,29 +11,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (ds = ((2008 - 4) - 8)) - type: boolean + predicate: (ds = ((2008 - 4) - 8)) (type: boolean) + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input_part7.q.out ql/src/test/results/clientpositive/input_part7.q.out index e2894e9..2713fe3 100644 --- ql/src/test/results/clientpositive/input_part7.q.out +++ ql/src/test/results/clientpositive/input_part7.q.out @@ -15,7 +15,99 @@ SELECT * FROM ( SORT BY A.key, A.value, A.ds, A.hr POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRCPART) X)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME X)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL X) ds) '2008-04-08') (< (. (TOK_TABLE_OR_COL X) key) 100))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRCPART) Y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME Y)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL Y) ds) '2008-04-08') (< (. (TOK_TABLE_OR_COL Y) key) 100)))))) A)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL A) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL A) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL A) ds)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL A) hr))))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_UNION + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + SRCPART + X + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + X + TOK_WHERE + and + = + . + TOK_TABLE_OR_COL + X + ds + '2008-04-08' + < + . + TOK_TABLE_OR_COL + X + key + 100 + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + SRCPART + Y + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + Y + TOK_WHERE + and + = + . + TOK_TABLE_OR_COL + Y + ds + '2008-04-08' + < + . + TOK_TABLE_OR_COL + Y + key + 100 + A + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_SORTBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + A + key + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + A + value + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + A + ds + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + A + hr + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -24,137 +116,55 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:a-subquery1:x + Map Operator Tree: TableScan alias: x - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 100) - type: boolean - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Union - Statistics: - numRows: 38 dataSize: 7614 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7614 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 38 dataSize: 7614 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7614 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: ++++ - Statistics: - numRows: 38 dataSize: 7614 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7614 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - null-subquery2:a-subquery2:y + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: y - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 100) - type: boolean - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Union - Statistics: - numRows: 38 dataSize: 7614 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7614 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 38 dataSize: 7614 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7614 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: ++++ - Statistics: - numRows: 38 dataSize: 7614 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7614 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -248,15 +258,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 38 dataSize: 7614 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7614 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 38 dataSize: 7614 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7614 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/input_part8.q.out ql/src/test/results/clientpositive/input_part8.q.out index 757aafd..cd66293 100644 --- ql/src/test/results/clientpositive/input_part8.q.out +++ ql/src/test/results/clientpositive/input_part8.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.* FROM SRCPART x WHERE ds = '2008-04-08' LIMIT 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRCPART) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2008-04-08')) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -17,18 +14,14 @@ STAGE PLANS: Processor Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT x.* FROM SRCPART x WHERE ds = '2008-04-08' LIMIT 10 diff --git ql/src/test/results/clientpositive/input_part9.q.out ql/src/test/results/clientpositive/input_part9.q.out index 4056f5f..eb404bd 100644 --- ql/src/test/results/clientpositive/input_part9.q.out +++ ql/src/test/results/clientpositive/input_part9.q.out @@ -5,7 +5,44 @@ POSTHOOK: query: EXPLAIN EXTENDED SELECT x.* FROM SRCPART x WHERE key IS NOT NULL AND ds = '2008-04-08' order by x.key, x.hr POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRCPART) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))) (TOK_WHERE (AND (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL key)) (= (TOK_TABLE_OR_COL ds) '2008-04-08'))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL x) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL x) hr))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + SRCPART + x + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + x + TOK_WHERE + AND + TOK_FUNCTION + TOK_ISNOTNULL + TOK_TABLE_OR_COL + key + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + x + key + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + x + hr + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -14,52 +51,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: key is not null - type: boolean - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col3 (type: string) sort order: ++ - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -153,15 +163,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/input_testsequencefile.q.out ql/src/test/results/clientpositive/input_testsequencefile.q.out index 501df3c..1f16ac8 100644 --- ql/src/test/results/clientpositive/input_testsequencefile.q.out +++ ql/src/test/results/clientpositive/input_testsequencefile.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest4_sequencefile SELECT src.key, src.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest4_sequencefile))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -27,20 +24,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string + expressions: UDFToInteger(key) (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -71,12 +65,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -85,12 +77,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/input_testxpath.q.out ql/src/test/results/clientpositive/input_testxpath.q.out index 0e181c1..8b05375 100644 --- ql/src/test/results/clientpositive/input_testxpath.q.out +++ ql/src/test/results/clientpositive/input_testxpath.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src_thrift INSERT OVERWRITE TABLE dest1 SELECT src_thrift.lint[1], src_thrift.lintstring[0].mystring, src_thrift.mstringstring['key_2'] POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR ([ (. (TOK_TABLE_OR_COL src_thrift) lint) 1)) (TOK_SELEXPR (. ([ (. (TOK_TABLE_OR_COL src_thrift) lintstring) 0) mystring)) (TOK_SELEXPR ([ (. (TOK_TABLE_OR_COL src_thrift) mstringstring) 'key_2'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -27,22 +24,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_thrift + Map Operator Tree: TableScan alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: lint[1] - type: int - expr: lintstring[0].mystring - type: string - expr: mstringstring['key_2'] - type: string + expressions: lint[1] (type: int), lintstring[0].mystring (type: string), mstringstring['key_2'] (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -73,12 +65,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -87,12 +77,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input_testxpath2.q.out ql/src/test/results/clientpositive/input_testxpath2.q.out index 1c1f493..82b6edc 100644 --- ql/src/test/results/clientpositive/input_testxpath2.q.out +++ ql/src/test/results/clientpositive/input_testxpath2.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src_thrift INSERT OVERWRITE TABLE dest1 SELECT size(src_thrift.lint), size(src_thrift.lintstring), size(src_thrift.mstringstring) where src_thrift.lint IS NOT NULL AND NOT (src_thrift.mstringstring IS NULL) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION size (. (TOK_TABLE_OR_COL src_thrift) lint))) (TOK_SELEXPR (TOK_FUNCTION size (. (TOK_TABLE_OR_COL src_thrift) lintstring))) (TOK_SELEXPR (TOK_FUNCTION size (. (TOK_TABLE_OR_COL src_thrift) mstringstring)))) (TOK_WHERE (AND (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL src_thrift) lint)) (NOT (TOK_FUNCTION TOK_ISNULL (. (TOK_TABLE_OR_COL src_thrift) mstringstring))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -27,26 +24,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_thrift + Map Operator Tree: TableScan alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (lint is not null and (not mstringstring is null)) - type: boolean + predicate: (lint is not null and (not mstringstring is null)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: size(lint) - type: int - expr: size(lintstring) - type: int - expr: size(mstringstring) - type: int + expressions: size(lint) (type: int), size(lintstring) (type: int), size(mstringstring) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -77,12 +68,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -91,12 +80,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input_testxpath3.q.out ql/src/test/results/clientpositive/input_testxpath3.q.out index 7b7a9cf..58ac628 100644 --- ql/src/test/results/clientpositive/input_testxpath3.q.out +++ ql/src/test/results/clientpositive/input_testxpath3.q.out @@ -6,9 +6,6 @@ POSTHOOK: query: EXPLAIN FROM src_thrift SELECT src_thrift.mstringstring['key_9'], src_thrift.lintstring.myint POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR ([ (. (TOK_TABLE_OR_COL src_thrift) mstringstring) 'key_9')) (TOK_SELEXPR (. (. (TOK_TABLE_OR_COL src_thrift) lintstring) myint))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -16,20 +13,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_thrift + Map Operator Tree: TableScan alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: mstringstring['key_9'] - type: string - expr: lintstring.myint - type: array + expressions: mstringstring['key_9'] (type: string), lintstring.myint (type: array) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/input_testxpath4.q.out ql/src/test/results/clientpositive/input_testxpath4.q.out index f537fed..6c48ce1 100644 --- ql/src/test/results/clientpositive/input_testxpath4.q.out +++ ql/src/test/results/clientpositive/input_testxpath4.q.out @@ -12,9 +12,6 @@ WHERE src_thrift.mstringstring['key_9'] IS NOT NULL AND lintstring.myint IS NOT NULL AND lintstring IS NOT NULL POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR ([ (. (TOK_TABLE_OR_COL src_thrift) mstringstring) 'key_9')) (TOK_SELEXPR (. (TOK_TABLE_OR_COL lintstring) myint))) (TOK_WHERE (AND (AND (TOK_FUNCTION TOK_ISNOTNULL ([ (. (TOK_TABLE_OR_COL src_thrift) mstringstring) 'key_9')) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL lintstring) myint))) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL lintstring)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -22,24 +19,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_thrift + Map Operator Tree: TableScan alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((mstringstring['key_9'] is not null and lintstring.myint is not null) and lintstring is not null) - type: boolean + predicate: ((mstringstring['key_9'] is not null and lintstring.myint is not null) and lintstring is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: mstringstring['key_9'] - type: string - expr: lintstring.myint - type: array + expressions: mstringstring['key_9'] (type: string), lintstring.myint (type: array) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -89,9 +82,6 @@ WHERE src_thrift.mstringstring['key_9'] IS NOT NULL AND lintstring.myint IS NOT NULL AND lintstring IS NOT NULL POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR ([ (. (TOK_TABLE_OR_COL src_thrift) mstringstring) 'key_9')) (TOK_SELEXPR (. (TOK_TABLE_OR_COL lintstring) myint))) (TOK_WHERE (AND (AND (TOK_FUNCTION TOK_ISNOTNULL ([ (. (TOK_TABLE_OR_COL src_thrift) mstringstring) 'key_9')) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL lintstring) myint))) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL lintstring)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -99,24 +89,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_thrift + Map Operator Tree: TableScan alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((mstringstring['key_9'] is not null and lintstring.myint is not null) and lintstring is not null) - type: boolean + predicate: ((mstringstring['key_9'] is not null and lintstring.myint is not null) and lintstring is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: mstringstring['key_9'] - type: string - expr: lintstring.myint - type: array + expressions: mstringstring['key_9'] (type: string), lintstring.myint (type: array) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/inputddl1.q.out ql/src/test/results/clientpositive/inputddl1.q.out index e55a308..4dee65c 100644 --- ql/src/test/results/clientpositive/inputddl1.q.out +++ ql/src/test/results/clientpositive/inputddl1.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: EXPLAIN CREATE TABLE INPUTDDL1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME INPUTDDL1) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL key TOK_INT) (TOK_TABCOL value TOK_STRING)) TOK_TBLTEXTFILE) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -15,12 +12,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: key int, value string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: INPUTDDL1 - isExternal: false PREHOOK: query: CREATE TABLE INPUTDDL1(key INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/inputddl2.q.out ql/src/test/results/clientpositive/inputddl2.q.out index a5445c6..b20bed8 100644 --- ql/src/test/results/clientpositive/inputddl2.q.out +++ ql/src/test/results/clientpositive/inputddl2.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: EXPLAIN CREATE TABLE INPUTDDL2(key INT, value STRING) PARTITIONED BY(ds STRING, country STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME INPUTDDL2) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL key TOK_INT) (TOK_TABCOL value TOK_STRING)) (TOK_TABLEPARTCOLS (TOK_TABCOLLIST (TOK_TABCOL ds TOK_STRING) (TOK_TABCOL country TOK_STRING))) TOK_TBLTEXTFILE) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -15,13 +12,10 @@ STAGE PLANS: Create Table Operator: Create Table columns: key int, value string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat partition columns: ds string, country string name: INPUTDDL2 - isExternal: false PREHOOK: query: CREATE TABLE INPUTDDL2(key INT, value STRING) PARTITIONED BY(ds STRING, country STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/inputddl3.q.out ql/src/test/results/clientpositive/inputddl3.q.out index 9d96ebf..afd7d74 100644 --- ql/src/test/results/clientpositive/inputddl3.q.out +++ ql/src/test/results/clientpositive/inputddl3.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: EXPLAIN CREATE TABLE INPUTDDL3(key INT, value STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE POSTHOOK: type: CREATETABLE -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME INPUTDDL3) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL key TOK_INT) (TOK_TABCOL value TOK_STRING)) (TOK_TABLEROWFORMAT (TOK_SERDEPROPS (TOK_TABLEROWFORMATFIELD '\t'))) TOK_TBLTEXTFILE) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -16,12 +13,9 @@ STAGE PLANS: Create Table columns: key int, value string field delimiter: - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: INPUTDDL3 - isExternal: false PREHOOK: query: CREATE TABLE INPUTDDL3(key INT, value STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE PREHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/inputddl6.q.out ql/src/test/results/clientpositive/inputddl6.q.out index 348038c..3ca50e3 100644 --- ql/src/test/results/clientpositive/inputddl6.q.out +++ ql/src/test/results/clientpositive/inputddl6.q.out @@ -76,9 +76,6 @@ PREHOOK: type: DESCTABLE POSTHOOK: query: EXPLAIN DESCRIBE EXTENDED INPUTDDL6 PARTITION (ds='2008-04-09') POSTHOOK: type: DESCTABLE -ABSTRACT SYNTAX TREE: - (TOK_DESCTABLE (TOK_TABTYPE INPUTDDL6 (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-09'))) EXTENDED) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage diff --git ql/src/test/results/clientpositive/insert1.q.out ql/src/test/results/clientpositive/insert1.q.out index 079e166..31ceff2 100644 --- ql/src/test/results/clientpositive/insert1.q.out +++ ql/src/test/results/clientpositive/insert1.q.out @@ -24,9 +24,6 @@ POSTHOOK: query: explain insert into table insert1 select a.key, a.value from in POSTHOOK: type: QUERY POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME insert2) a)) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) key) (- 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -40,24 +37,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (key = (- 1)) - type: boolean + predicate: (key = (- 1)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -88,12 +81,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -102,12 +93,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -126,9 +115,6 @@ POSTHOOK: query: explain insert into table INSERT1 select a.key, a.value from in POSTHOOK: type: QUERY POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME insert2) a)) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME INSERT1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) key) (- 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -142,24 +128,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (key = (- 1)) - type: boolean + predicate: (key = (- 1)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -190,12 +172,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -204,12 +184,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -243,9 +221,6 @@ POSTHOOK: query: explain insert into table x.INSERT1 select a.key, a.value from POSTHOOK: type: QUERY POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME insert2) a)) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME x INSERT1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) key) (- 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -259,24 +234,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (key = (- 1)) - type: boolean + predicate: (key = (- 1)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -307,12 +278,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -321,12 +290,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -345,9 +312,6 @@ POSTHOOK: query: explain insert into table default.INSERT1 select a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME insert2) a)) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME default INSERT1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) key) (- 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -361,24 +325,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (key = (- 1)) - type: boolean + predicate: (key = (- 1)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -409,12 +369,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -423,12 +381,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -453,9 +409,6 @@ insert overwrite table x.insert1 select * where key > 10 and key < 20 POSTHOOK: type: QUERY POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME insert2))) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME x insert1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 @@ -476,43 +429,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - insert2 + Map Operator Tree: TableScan alias: insert2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -543,12 +488,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -557,12 +500,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -599,12 +540,10 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -613,12 +552,10 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out index 0d92ff6..9fd1533 100644 --- ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out +++ ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out @@ -28,9 +28,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE destinTable PARTITION (ds='2011-11-11', hr='11') if not exists SELECT one,two FROM sourceTable WHERE ds='2011-11-11' AND hr='11' order by one desc, two desc limit 5 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME sourceTable))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME destinTable) (TOK_PARTSPEC (TOK_PARTVAL ds '2011-11-11') (TOK_PARTVAL hr '11'))) TOK_IFNOTEXISTS) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL one)) (TOK_SELEXPR (TOK_TABLE_OR_COL two))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2011-11-11') (= (TOK_TABLE_OR_COL hr) '11'))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL one)) (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL two))) (TOK_LIMIT 5))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -39,36 +36,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - sourcetable + Map Operator Tree: TableScan alias: sourcetable + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: one - type: string - expr: two - type: string + expressions: one (type: string), two (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: -- - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -130,9 +119,6 @@ SELECT one,two FROM sourceTable WHERE ds='2011-11-11' AND hr='12' order by one d POSTHOOK: type: QUERY POSTHOOK: Lineage: destintable PARTITION(ds=2011-11-11,hr=11).one SIMPLE [(sourcetable)sourcetable.FieldSchema(name:one, type:string, comment:null), ] POSTHOOK: Lineage: destintable PARTITION(ds=2011-11-11,hr=11).two SIMPLE [(sourcetable)sourcetable.FieldSchema(name:two, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME sourceTable))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME destinTable) (TOK_PARTSPEC (TOK_PARTVAL ds '2011-11-11') (TOK_PARTVAL hr '11'))) TOK_IFNOTEXISTS) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL one)) (TOK_SELEXPR (TOK_TABLE_OR_COL two))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2011-11-11') (= (TOK_TABLE_OR_COL hr) '12'))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL one)) (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL two))) (TOK_LIMIT 5))) - STAGE DEPENDENCIES: STAGE PLANS: @@ -184,9 +170,6 @@ POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE destinTable SELECT one,two FROM POSTHOOK: type: QUERY POSTHOOK: Lineage: destintable PARTITION(ds=2011-11-11,hr=11).one SIMPLE [(sourcetable)sourcetable.FieldSchema(name:one, type:string, comment:null), ] POSTHOOK: Lineage: destintable PARTITION(ds=2011-11-11,hr=11).two SIMPLE [(sourcetable)sourcetable.FieldSchema(name:two, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME sourceTable))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME destinTable))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL one)) (TOK_SELEXPR (TOK_TABLE_OR_COL two))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2011-11-11') (= (TOK_TABLE_OR_COL hr) '11'))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL one)) (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL two))) (TOK_LIMIT 5))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -195,36 +178,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - sourcetable + Map Operator Tree: TableScan alias: sourcetable + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: one - type: string - expr: two - type: string + expressions: one (type: string), two (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: -- - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out index fcc551e..8df39c7 100644 --- ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out +++ ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out @@ -35,9 +35,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE db2.destinTable PARTITION (ds='2011-11-11') SELECT one,two FROM db1.sourceTable WHERE ds='2011-11-11' order by one desc, two desc limit 5 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME db1 sourceTable))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME db2 destinTable) (TOK_PARTSPEC (TOK_PARTVAL ds '2011-11-11')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL one)) (TOK_SELEXPR (TOK_TABLE_OR_COL two))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2011-11-11')) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL one)) (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL two))) (TOK_LIMIT 5))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -46,36 +43,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - sourcetable + Map Operator Tree: TableScan alias: sourcetable + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: one - type: string - expr: two - type: string + expressions: one (type: string), two (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: -- - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -136,9 +125,6 @@ SELECT one,two FROM db1.sourceTable WHERE ds='2011-11-11' order by one desc, two POSTHOOK: type: QUERY POSTHOOK: Lineage: destintable PARTITION(ds=2011-11-11).one SIMPLE [(sourcetable)sourcetable.FieldSchema(name:one, type:string, comment:null), ] POSTHOOK: Lineage: destintable PARTITION(ds=2011-11-11).two SIMPLE [(sourcetable)sourcetable.FieldSchema(name:two, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME db1 sourceTable))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME db2 destinTable) (TOK_PARTSPEC (TOK_PARTVAL ds '2011-11-11')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL one)) (TOK_SELEXPR (TOK_TABLE_OR_COL two))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2011-11-11')) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL one)) (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL two))) (TOK_LIMIT 5))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -147,36 +133,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - sourcetable + Map Operator Tree: TableScan alias: sourcetable + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: one - type: string - expr: two - type: string + expressions: one (type: string), two (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: -- - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/insert_into1.q.out ql/src/test/results/clientpositive/insert_into1.q.out index bbc9d20..2709691 100644 --- ql/src/test/results/clientpositive/insert_into1.q.out +++ ql/src/test/results/clientpositive/insert_into1.q.out @@ -11,9 +11,6 @@ PREHOOK: query: EXPLAIN INSERT INTO TABLE insert_into1 SELECT * from src LIMIT 1 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN INSERT INTO TABLE insert_into1 SELECT * from src LIMIT 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert_into1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 100))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,39 +19,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -105,9 +97,6 @@ POSTHOOK: query: EXPLAIN INSERT INTO TABLE insert_into1 SELECT * FROM src LIMIT POSTHOOK: type: QUERY POSTHOOK: Lineage: insert_into1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: insert_into1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert_into1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 100))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -116,39 +105,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -218,9 +202,6 @@ POSTHOOK: Lineage: insert_into1.key EXPRESSION [(src)src.FieldSchema(name:key, t POSTHOOK: Lineage: insert_into1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: insert_into1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: insert_into1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME insert_into1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -229,39 +210,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/insert_into2.q.out ql/src/test/results/clientpositive/insert_into2.q.out index ba3dcd5..e41fe39 100644 --- ql/src/test/results/clientpositive/insert_into2.q.out +++ ql/src/test/results/clientpositive/insert_into2.q.out @@ -15,9 +15,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN INSERT INTO TABLE insert_into2 PARTITION (ds='1') SELECT * FROM src LIMIT 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert_into2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 100))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -26,39 +23,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -146,9 +138,6 @@ POSTHOOK: Lineage: insert_into2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSc POSTHOOK: Lineage: insert_into2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: insert_into2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: insert_into2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME insert_into2) (TOK_PARTSPEC (TOK_PARTVAL ds '2')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 100))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -157,39 +146,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -262,9 +246,6 @@ POSTHOOK: Lineage: insert_into2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSc POSTHOOK: Lineage: insert_into2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: insert_into2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: insert_into2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME insert_into2) (TOK_PARTSPEC (TOK_PARTVAL ds '2')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 50))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -273,39 +254,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 50 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 50 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/insert_into3.q.out ql/src/test/results/clientpositive/insert_into3.q.out index 5c9a296..c93ff1a 100644 --- ql/src/test/results/clientpositive/insert_into3.q.out +++ ql/src/test/results/clientpositive/insert_into3.q.out @@ -22,9 +22,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src INSERT INTO TABLE insert_into3a SELECT * ORDER BY key, value LIMIT 50 INSERT INTO TABLE insert_into3b SELECT * ORDER BY key, value LIMIT 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert_into3a))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 50)) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert_into3b))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 100))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -36,57 +33,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 50 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -108,35 +90,26 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -216,9 +189,6 @@ POSTHOOK: Lineage: insert_into3a.key EXPRESSION [(src)src.FieldSchema(name:key, POSTHOOK: Lineage: insert_into3a.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: insert_into3b.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: insert_into3b.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME insert_into3a))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10)) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert_into3b))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -230,54 +200,47 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -299,30 +262,25 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/insert_into4.q.out ql/src/test/results/clientpositive/insert_into4.q.out index ee445c6..228b2f7 100644 --- ql/src/test/results/clientpositive/insert_into4.q.out +++ ql/src/test/results/clientpositive/insert_into4.q.out @@ -20,9 +20,6 @@ PREHOOK: query: EXPLAIN INSERT INTO TABLE insert_into4a SELECT * FROM src LIMIT PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN INSERT INTO TABLE insert_into4a SELECT * FROM src LIMIT 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert_into4a))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -31,39 +28,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -114,9 +106,6 @@ POSTHOOK: query: EXPLAIN INSERT INTO TABLE insert_into4a SELECT * FROM src LIMIT POSTHOOK: type: QUERY POSTHOOK: Lineage: insert_into4a.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: insert_into4a.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert_into4a))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -125,39 +114,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -218,9 +202,6 @@ POSTHOOK: Lineage: insert_into4a.key EXPRESSION [(src)src.FieldSchema(name:key, POSTHOOK: Lineage: insert_into4a.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: insert_into4a.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: insert_into4a.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME insert_into4a))) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert_into4b))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -234,20 +215,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - insert_into4a + Map Operator Tree: TableScan alias: insert_into4a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -278,12 +256,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -292,12 +268,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/insert_into5.q.out ql/src/test/results/clientpositive/insert_into5.q.out index 6f56a1f..34600b9 100644 --- ql/src/test/results/clientpositive/insert_into5.q.out +++ ql/src/test/results/clientpositive/insert_into5.q.out @@ -20,9 +20,6 @@ PREHOOK: query: EXPLAIN INSERT INTO TABLE insert_into5a SELECT 1, 'one' FROM src PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN INSERT INTO TABLE insert_into5a SELECT 1, 'one' FROM src LIMIT 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert_into5a))) (TOK_SELECT (TOK_SELEXPR 1) (TOK_SELEXPR 'one')) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -31,32 +28,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: 1 - type: int - expr: 'one' - type: string + expressions: 1 (type: int), 'one' (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 10 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 10 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -107,9 +102,6 @@ POSTHOOK: query: EXPLAIN INSERT INTO TABLE insert_into5a SELECT * FROM insert_in POSTHOOK: type: QUERY POSTHOOK: Lineage: insert_into5a.key SIMPLE [] POSTHOOK: Lineage: insert_into5a.value SIMPLE [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME insert_into5a))) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert_into5a))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -123,20 +115,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - insert_into5a + Map Operator Tree: TableScan alias: insert_into5a + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -167,12 +156,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -181,12 +168,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -238,9 +223,6 @@ POSTHOOK: Lineage: insert_into5a.key SIMPLE [] POSTHOOK: Lineage: insert_into5a.key SIMPLE [(insert_into5a)insert_into5a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: insert_into5a.value SIMPLE [] POSTHOOK: Lineage: insert_into5a.value SIMPLE [(insert_into5a)insert_into5a.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME insert_into5a))) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert_into5b) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -254,20 +236,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - insert_into5a + Map Operator Tree: TableScan alias: insert_into5a + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -300,12 +279,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -314,12 +291,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -379,9 +354,6 @@ POSTHOOK: Lineage: insert_into5a.value SIMPLE [] POSTHOOK: Lineage: insert_into5a.value SIMPLE [(insert_into5a)insert_into5a.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: insert_into5b PARTITION(ds=1).key SIMPLE [(insert_into5a)insert_into5a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: insert_into5b PARTITION(ds=1).value SIMPLE [(insert_into5a)insert_into5a.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME insert_into5b))) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert_into5b) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -395,20 +367,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - insert_into5b + Map Operator Tree: TableScan alias: insert_into5b + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -441,12 +410,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -455,12 +422,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/insert_into6.q.out ql/src/test/results/clientpositive/insert_into6.q.out index e30fca1..07316a0 100644 --- ql/src/test/results/clientpositive/insert_into6.q.out +++ ql/src/test/results/clientpositive/insert_into6.q.out @@ -22,9 +22,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN INSERT INTO TABLE insert_into6a PARTITION (ds='1') SELECT * FROM src LIMIT 150 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert_into6a) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 150))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -33,39 +30,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 150 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 150 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -140,9 +132,6 @@ POSTHOOK: Lineage: insert_into6a PARTITION(ds=1).key EXPRESSION [(src)src.FieldS POSTHOOK: Lineage: insert_into6a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: insert_into6a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: insert_into6a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME insert_into6a))) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert_into6b) (TOK_PARTSPEC (TOK_PARTVAL ds)))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -156,22 +145,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - insert_into6a + Map Operator Tree: TableScan alias: insert_into6a + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -204,12 +188,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -218,12 +200,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/keyword_1.q.out ql/src/test/results/clientpositive/keyword_1.q.out index b786da8..7199d20 100644 --- ql/src/test/results/clientpositive/keyword_1.q.out +++ ql/src/test/results/clientpositive/keyword_1.q.out @@ -17,9 +17,6 @@ PREHOOK: query: explain select user from test_user PREHOOK: type: QUERY POSTHOOK: query: explain select user from test_user POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_user))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL user))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -27,18 +24,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test_user + Map Operator Tree: TableScan alias: test_user + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: user - type: string + expressions: user (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -82,9 +78,6 @@ PREHOOK: query: explain select role from test_user PREHOOK: type: QUERY POSTHOOK: query: explain select role from test_user POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_user))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL role))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -92,18 +85,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test_user + Map Operator Tree: TableScan alias: test_user + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: role - type: string + expressions: role (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/lateral_view.q.out ql/src/test/results/clientpositive/lateral_view.q.out index ab9846d..119ddf5 100644 --- ql/src/test/results/clientpositive/lateral_view.q.out +++ ql/src/test/results/clientpositive/lateral_view.q.out @@ -17,9 +17,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol SORT BY key ASC, myCol ASC LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Lineage: tmp_pyang_lv.inputs SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol (TOK_TABALIAS myTable))) (TOK_TABREF (TOK_TABNAME src)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL myCol))) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -28,83 +25,55 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: int + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col2 - type: int + key expressions: _col0 (type: string), _col2 (type: int) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: int + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) Select Operator - expressions: - expr: array(1,2,3) - type: array + expressions: array(1,2,3) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: int + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col2 - type: int + key expressions: _col0 (type: string), _col2 (type: int) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: int + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -112,30 +81,22 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col2 - type: int + key expressions: _col0 (type: string), _col2 (type: int) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: int + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -150,9 +111,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT myTable.* FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Lineage: tmp_pyang_lv.inputs SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol (TOK_TABALIAS myTable))) (TOK_TABREF (TOK_TABNAME src)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME myTable)))) (TOK_LIMIT 3))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -160,45 +118,51 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Forward + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: int + expressions: _col4 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 3 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array(1,2,3) - type: array + expressions: array(1,2,3) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: int + expressions: _col4 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 3 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -213,9 +177,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT myTable.myCol, myTable2.myCol2 FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array('a', 'b', 'c')) myTable2 AS myCol2 LIMIT 9 POSTHOOK: type: QUERY POSTHOOK: Lineage: tmp_pyang_lv.inputs SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 'a' 'b' 'c')) myCol2 (TOK_TABALIAS myTable2))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol (TOK_TABALIAS myTable))) (TOK_TABREF (TOK_TABNAME src))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL myTable) myCol)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL myTable2) myCol2))) (TOK_LIMIT 9))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -223,113 +184,117 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Forward + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Forward + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: int + expressions: _col4 (type: int) outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4, _col5 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 9 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array('a','b','c') - type: array + expressions: array('a','b','c') (type: array) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col4, _col5 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 9 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array(1,2,3) - type: array + expressions: array(1,2,3) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Forward + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: int + expressions: _col4 (type: int) outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4, _col5 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 9 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array('a','b','c') - type: array + expressions: array('a','b','c') (type: array) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col4, _col5 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 9 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -344,9 +309,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT myTable2.* FROM src LATERAL VIEW explode(array(array(1,2,3))) myTable AS myCol LATERAL VIEW explode(myTable.myCol) myTable2 AS myCol2 LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Lineage: tmp_pyang_lv.inputs SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (. (TOK_TABLE_OR_COL myTable) myCol)) myCol2 (TOK_TABALIAS myTable2))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (TOK_FUNCTION array 1 2 3))) myCol (TOK_TABALIAS myTable))) (TOK_TABREF (TOK_TABNAME src))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME myTable2)))) (TOK_LIMIT 3))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -354,97 +316,113 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Forward + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Forward + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col5 - type: int + expressions: _col5 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 3 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: _col4 - type: array + expressions: _col4 (type: array) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col5 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col5 - type: int + expressions: _col5 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 3 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array(array(1,2,3)) - type: array> + expressions: array(array(1,2,3)) (type: array>) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col4 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Forward + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col5 - type: int + expressions: _col5 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 3 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: _col4 - type: array + expressions: _col4 (type: array) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col5 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col5 - type: int + expressions: _col5 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 3 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -521,9 +499,6 @@ POSTHOOK: query: EXPLAIN SELECT myCol from tmp_pyang_lv LATERAL VIEW explode(array(1,2,3)) myTab as myCol limit 3 POSTHOOK: type: QUERY POSTHOOK: Lineage: tmp_pyang_lv.inputs SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol (TOK_TABALIAS myTab))) (TOK_TABREF (TOK_TABNAME tmp_pyang_lv)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL myCol))) (TOK_LIMIT 3))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -531,45 +506,51 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp_pyang_lv + Map Operator Tree: TableScan alias: tmp_pyang_lv + Statistics: Num rows: 500 Data size: 1406 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Forward + Statistics: Num rows: 500 Data size: 1406 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col3 + Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col3 - type: int + expressions: _col3 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array(1,2,3) - type: array + expressions: array(1,2,3) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col3 + Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col3 - type: int + expressions: _col3 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -821,9 +802,6 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: tmp_pyang_lv.inputs SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp_pyang_src_rcfile.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp_pyang_src_rcfile.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_TABLE_OR_COL value)) myCol (TOK_TABALIAS myTable))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp_pyang_src_rcfile))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION array ([ (TOK_TABLE_OR_COL value) 0)) value)) (TOK_GROUPBY ([ (TOK_TABLE_OR_COL value) 0) (TOK_TABLE_OR_COL key)))) a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL myCol))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -831,95 +809,71 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:tmp_pyang_src_rcfile + Map Operator Tree: TableScan alias: tmp_pyang_src_rcfile + Statistics: Num rows: 20 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: array - expr: key - type: string + expressions: value (type: array), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 20 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: value[0] - type: string - expr: key - type: string + keys: value[0] (type: string), key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 20 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 92 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: array(_col0) - type: array + expressions: array(_col0) (type: array) outputColumnNames: _col1 + Statistics: Num rows: 10 Data size: 92 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 10 Data size: 92 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: array + expressions: _col1 (type: array) outputColumnNames: _col1 + Statistics: Num rows: 10 Data size: 92 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col1, _col2 + Statistics: Num rows: 20 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: array - expr: _col2 - type: string + expressions: _col1 (type: array), _col2 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 184 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 184 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: _col1 - type: array + expressions: _col1 (type: array) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 92 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 10 Data size: 92 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col1, _col2 + Statistics: Num rows: 20 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: array - expr: _col2 - type: string + expressions: _col1 (type: array), _col2 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 184 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 184 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/lateral_view_cp.q.out ql/src/test/results/clientpositive/lateral_view_cp.q.out index 78fefbd..c3f7508 100644 --- ql/src/test/results/clientpositive/lateral_view_cp.q.out +++ ql/src/test/results/clientpositive/lateral_view_cp.q.out @@ -27,9 +27,6 @@ explain select count(val) from (select a.key as key, b.value as array_val from s POSTHOOK: type: QUERY POSTHOOK: Lineage: array_valued_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: array_valued_src.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_TABLE_OR_COL array_val)) val (TOK_TABALIAS c))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME array_valued_src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) array_val)))) i))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL val)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -38,34 +35,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - i:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - i:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: value - type: array + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: array) Reduce Operator Tree: Join Operator condition map: @@ -73,58 +60,55 @@ STAGE PLANS: condition expressions: 0 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col5 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col5 - type: array + expressions: _col5 (type: array) outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string + expressions: _col2 (type: string) outputColumnNames: _col2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(_col2) - bucketGroup: false + aggregations: count(_col2) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Select Operator - expressions: - expr: _col1 - type: array + expressions: _col1 (type: array) outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string + expressions: _col2 (type: string) outputColumnNames: _col2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(_col2) - bucketGroup: false + aggregations: count(_col2) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -132,30 +116,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/lateral_view_noalias.q.out ql/src/test/results/clientpositive/lateral_view_noalias.q.out index 6e6e496..c8cd5f2 100644 --- ql/src/test/results/clientpositive/lateral_view_noalias.q.out +++ ql/src/test/results/clientpositive/lateral_view_noalias.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: --HIVE-2608 Do not require AS a,b,c part in LATERAL VIEW EXPLAIN SELECT myTab.* from src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) myTab limit 2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION map 'key1' 100 'key2' 200)) (TOK_TABALIAS myTab))) (TOK_TABREF (TOK_TABNAME src)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME myTab)))) (TOK_LIMIT 2))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,49 +11,51 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Forward + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4, _col5 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: string - expr: _col5 - type: int + expressions: _col4 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: map('key1':100,'key2':200) - type: map + expressions: map('key1':100,'key2':200) (type: map) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col4, _col5 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: string - expr: _col5 - type: int + expressions: _col4 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -80,9 +79,6 @@ PREHOOK: query: EXPLAIN SELECT explode(map('key1', 100, 'key2', 200)) from src l PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT explode(map('key1', 100, 'key2', 200)) from src limit 2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION map 'key1' 100 'key2' 200)))) (TOK_LIMIT 2))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -90,21 +86,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: map('key1':100,'key2':200) - type: map + expressions: map('key1':100,'key2':200) (type: map) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -135,9 +133,6 @@ PREHOOK: query: explain select * from lv_noalias a join lv_noalias b on a.key=b. PREHOOK: type: QUERY POSTHOOK: query: explain select * from lv_noalias a join lv_noalias b on a.key=b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME lv_noalias) a) (TOK_TABREF (TOK_TABNAME lv_noalias) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -147,61 +142,57 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Forward + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4, _col5 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: string - expr: _col5 - type: int + expressions: _col4 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: int) Select Operator - expressions: - expr: map('key1':100,'key2':200) - type: map + expressions: map('key1':100,'key2':200) (type: map) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col4, _col5 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: string - expr: _col5 - type: int + expressions: _col4 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -209,39 +200,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: int) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -249,22 +222,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 12786 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col3 - type: int + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 12786 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 12786 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -272,61 +238,57 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - b:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Forward + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4, _col5 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: string - expr: _col5 - type: int + expressions: _col4 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: int) Select Operator - expressions: - expr: map('key1':100,'key2':200) - type: map + expressions: map('key1':100,'key2':200) (type: map) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Lateral View Join Operator outputColumnNames: _col4, _col5 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: string - expr: _col5 - type: int + expressions: _col4 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/lateral_view_outer.q.out ql/src/test/results/clientpositive/lateral_view_outer.q.out index 6d301ef..13335ef 100644 --- ql/src/test/results/clientpositive/lateral_view_outer.q.out +++ ql/src/test/results/clientpositive/lateral_view_outer.q.out @@ -6,9 +6,6 @@ POSTHOOK: query: -- UDTF forwards nothing, OUTER LV add null for that explain select * from src LATERAL VIEW OUTER explode(array()) C AS a limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW_OUTER (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array)) a (TOK_TABALIAS C))) (TOK_TABREF (TOK_TABNAME src)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -16,60 +13,54 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array() - type: array + expressions: array() (type: array) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE function name: explode outer lateral view: true Lateral View Join Operator outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -105,9 +96,6 @@ POSTHOOK: query: -- backward compatible (UDTF forwards something for OUTER LV) explain select * from src LATERAL VIEW OUTER explode(array(4,5)) C AS a limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW_OUTER (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 4 5)) a (TOK_TABALIAS C))) (TOK_TABREF (TOK_TABNAME src)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -115,60 +103,54 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: int + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array(4,5) - type: array + expressions: array(4,5) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE function name: explode outer lateral view: true Lateral View Join Operator outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: int + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -209,9 +191,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW_OUTER (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_TABLE_OR_COL value)) a (TOK_TABALIAS C))) (TOK_TABREF (TOK_TABNAME array_valued)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -219,60 +198,54 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - array_valued + Map Operator Tree: TableScan alias: array_valued + Statistics: Num rows: 500 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 500 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: array + expressions: key (type: string), value (type: array) outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 1000 Data size: 11220 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: array - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: array), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 11220 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 110 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 110 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: value - type: array + expressions: value (type: array) outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5610 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 500 Data size: 5610 Basic stats: COMPLETE Column stats: NONE function name: explode outer lateral view: true Lateral View Join Operator outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 1000 Data size: 11220 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: array - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: array), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 11220 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 110 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 110 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/lateral_view_ppd.q.out ql/src/test/results/clientpositive/lateral_view_ppd.q.out index f54c809..da77f75 100644 --- ql/src/test/results/clientpositive/lateral_view_ppd.q.out +++ ql/src/test/results/clientpositive/lateral_view_ppd.q.out @@ -2,9 +2,6 @@ PREHOOK: query: EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE key='0' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol (TOK_TABALIAS myTable))) (TOK_TABREF (TOK_TABNAME src)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL myCol))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) '0')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -12,55 +9,50 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = '0') - type: boolean + predicate: (key = '0') (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col1, _col4 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col4 - type: int + expressions: _col1 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array(1,2,3) - type: array + expressions: array(1,2,3) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col1, _col4 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col4 - type: int + expressions: _col1 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -91,9 +83,6 @@ PREHOOK: query: EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE key='0' AND myCol=1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol (TOK_TABALIAS myTable))) (TOK_TABREF (TOK_TABNAME src)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL myCol))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL key) '0') (= (TOK_TABLE_OR_COL myCol) 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -101,65 +90,56 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = '0') - type: boolean + predicate: (key = '0') (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((_col0 = '0') and (_col4 = 1)) - type: boolean + predicate: ((_col0 = '0') and (_col4 = 1)) (type: boolean) + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col4 - type: int + expressions: _col1 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array(1,2,3) - type: array + expressions: array(1,2,3) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((_col0 = '0') and (_col4 = 1)) - type: boolean + predicate: ((_col0 = '0') and (_col4 = 1)) (type: boolean) + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col4 - type: int + expressions: _col1 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -184,9 +164,6 @@ PREHOOK: query: EXPLAIN SELECT value, myCol FROM (SELECT * FROM srcpart LATERAL PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT value, myCol FROM (SELECT * FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE ds='2008-04-08' AND hr="12" LIMIT 12 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol (TOK_TABALIAS myTable))) (TOK_TABREF (TOK_TABNAME srcpart)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL myCol))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) "12"))) (TOK_LIMIT 12))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -194,53 +171,53 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col1, _col6 + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col6 - type: int + expressions: _col1 (type: string), _col6 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 12 + Statistics: Num rows: 12 Data size: 1200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 1200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array(1,2,3) - type: array + expressions: array(1,2,3) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col1, _col6 + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col6 - type: int + expressions: _col1 (type: string), _col6 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 12 + Statistics: Num rows: 12 Data size: 1200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 1200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -276,9 +253,6 @@ PREHOOK: query: EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array(1,2,3)) myTable2 AS myCol2) a WHERE key='0' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol2 (TOK_TABALIAS myTable2))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol (TOK_TABALIAS myTable))) (TOK_TABREF (TOK_TABNAME src))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL myCol))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) '0')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -286,121 +260,110 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = '0') - type: boolean + predicate: (key = '0') (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col1, _col4 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col4 - type: int + expressions: _col1 (type: string), _col4 (type: int) outputColumnNames: _col1, _col4 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col1, _col4, _col5 + Statistics: Num rows: 56 Data size: 11220 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col4 - type: int + expressions: _col1 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 56 Data size: 11220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 56 Data size: 11220 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array(1,2,3) - type: array + expressions: array(1,2,3) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col1, _col4, _col5 + Statistics: Num rows: 56 Data size: 11220 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col4 - type: int + expressions: _col1 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 56 Data size: 11220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 56 Data size: 11220 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array(1,2,3) - type: array + expressions: array(1,2,3) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col1, _col4 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col4 - type: int + expressions: _col1 (type: string), _col4 (type: int) outputColumnNames: _col1, _col4 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col1, _col4, _col5 + Statistics: Num rows: 56 Data size: 11220 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col4 - type: int + expressions: _col1 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 56 Data size: 11220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 56 Data size: 11220 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array(1,2,3) - type: array + expressions: array(1,2,3) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col1, _col4, _col5 + Statistics: Num rows: 56 Data size: 11220 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col4 - type: int + expressions: _col1 (type: string), _col4 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 56 Data size: 11220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 56 Data size: 11220 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/limit_pushdown.q.out ql/src/test/results/clientpositive/limit_pushdown.q.out index 7e6592d..a8add4c 100644 --- ql/src/test/results/clientpositive/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/limit_pushdown.q.out @@ -8,9 +8,6 @@ POSTHOOK: query: -- HIVE-3562 Some limit can be pushed down to map stage explain select key,value from src order by key limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -18,36 +15,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - TopN: 20 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -91,9 +81,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select key,value from src order by key desc limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL key))) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -101,36 +88,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: - - tag: -1 - TopN: 20 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -174,9 +154,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select value, sum(key + 1) as sum from src group by value limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION sum (+ (TOK_TABLE_OR_COL key) 1)) sum)) (TOK_GROUPBY (TOK_TABLE_OR_COL value)) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -184,61 +161,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum((key + 1)) - bucketGroup: false - keys: - expr: value - type: string + aggregations: sum((key + 1)) + keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - TopN: 20 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: - expr: _col1 - type: double + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -284,9 +244,6 @@ POSTHOOK: query: -- deduped RS explain select value,avg(key + 1) from src group by value order by value limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION avg (+ (TOK_TABLE_OR_COL key) 1)))) (TOK_GROUPBY (TOK_TABLE_OR_COL value)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -294,61 +251,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: avg((key + 1)) - bucketGroup: false - keys: - expr: value - type: string + aggregations: avg((key + 1)) + keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - TopN: 20 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: - expr: _col1 - type: struct + value expressions: _col1 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: avg(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -394,9 +334,6 @@ POSTHOOK: query: -- distincts explain select distinct(cdouble) from alltypesorc limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL cdouble))) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -404,50 +341,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 47154 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: cdouble - type: double + expressions: cdouble (type: double) outputColumnNames: cdouble + Statistics: Num rows: 47154 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: cdouble - type: double + keys: cdouble (type: double) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 47154 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - Map-reduce partition columns: - expr: _col0 - type: double - tag: -1 - TopN: 20 + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 47154 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: double + keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 23577 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double + expressions: _col0 (type: double) outputColumnNames: _col0 + Statistics: Num rows: 23577 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -491,9 +419,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ctinyint)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL cdouble)))) (TOK_GROUPBY (TOK_TABLE_OR_COL ctinyint)) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -501,65 +426,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: ctinyint - type: tinyint - expr: cdouble - type: double + expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT cdouble) - bucketGroup: false - keys: - expr: ctinyint - type: tinyint - expr: cdouble - type: double + aggregations: count(DISTINCT cdouble) + keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: double + key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: tinyint - tag: -1 - TopN: 20 + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: - expr: _col2 - type: bigint + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: tinyint + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15718 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: bigint + expressions: _col0 (type: tinyint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15718 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -605,9 +509,6 @@ POSTHOOK: query: -- multi distinct explain select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from alltypesorc group by ctinyint limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ctinyint)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL cstring1))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL cstring2)))) (TOK_GROUPBY (TOK_TABLE_OR_COL ctinyint)) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -615,77 +516,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 1849 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: ctinyint - type: tinyint - expr: cstring1 - type: string - expr: cstring2 - type: string + expressions: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) outputColumnNames: ctinyint, cstring1, cstring2 + Statistics: Num rows: 1849 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT cstring1) - expr: count(DISTINCT cstring2) - bucketGroup: false - keys: - expr: ctinyint - type: tinyint - expr: cstring1 - type: string - expr: cstring2 - type: string + aggregations: count(DISTINCT cstring1), count(DISTINCT cstring2) + keys: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1849 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: tinyint - tag: -1 - TopN: 20 + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 1849 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: - expr: _col3 - type: bigint - expr: _col4 - type: bigint + value expressions: _col3 (type: bigint), _col4 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - expr: count(DISTINCT KEY._col1:1._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: tinyint + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 924 Data size: 188516 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + expressions: _col0 (type: tinyint), _col1 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 924 Data size: 188516 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4080 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 4080 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -731,9 +599,6 @@ POSTHOOK: query: -- limit zero explain select key,value from src order by key limit 0 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))) (TOK_LIMIT 0))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -741,34 +606,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -794,9 +653,6 @@ POSTHOOK: query: -- 2MR (applied to last RS) explain select value, sum(key) as sum from src group by value order by sum limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key)) sum)) (TOK_GROUPBY (TOK_TABLE_OR_COL value)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL sum))) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -805,58 +661,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(key) - bucketGroup: false - keys: - expr: value - type: string + aggregations: sum(key) + keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -864,28 +701,23 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: double + key expressions: _col1 (type: double) sort order: + - tag: -1 - TopN: 20 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: double + value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Extract + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -939,9 +771,6 @@ join (select key, count(1) from src group by key limit 3) subq2 on subq.key=subq2.key limit 4 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))) (TOK_LIMIT 2))) subq) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_LIMIT 3))) subq2) (= (. (TOK_TABLE_OR_COL subq) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 4))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -952,59 +781,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - TopN: 2 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: - expr: _col1 - type: bigint + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1012,39 +825,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -1052,23 +847,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 330 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 330 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 4 + Statistics: Num rows: 3 Data size: 330 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 330 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1076,59 +866,43 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - subq2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - TopN: 3 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: - expr: _col1 - type: bigint + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1136,25 +910,21 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - TopN: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1172,9 +942,6 @@ POSTHOOK: query: -- map aggregation disabled explain select value, sum(key) as sum from src group by value limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key)) sum)) (TOK_GROUPBY (TOK_TABLE_OR_COL value)) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1182,52 +949,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: value (type: string) sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: -1 - TopN: 20 + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: - expr: key - type: string + value expressions: key (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1273,9 +1026,6 @@ POSTHOOK: query: -- flush for order-by explain select key,value,value,value,value,value,value,value,value from src order by key limit 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))) (TOK_LIMIT 100))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1283,64 +1033,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: value - type: string - expr: value - type: string - expr: value - type: string - expr: value - type: string - expr: value - type: string - expr: value - type: string - expr: value - type: string + expressions: key (type: string), value (type: string), value (type: string), value (type: string), value (type: string), value (type: string), value (type: string), value (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - TopN: 100 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 2.0E-5 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col8 - type: string + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1466,9 +1181,6 @@ POSTHOOK: query: -- flush for group-by explain select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) limit 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key)) sum)) (TOK_GROUPBY (TOK_FUNCTION concat (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL value))) (TOK_LIMIT 100))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1476,50 +1188,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: concat(key, value, value, value, value, value, value, value, value, value) - type: string + key expressions: concat(key, value, value, value, value, value, value, value, value, value) (type: string) sort order: + - Map-reduce partition columns: - expr: concat(key, value, value, value, value, value, value, value, value, value) - type: string - tag: -1 - TopN: 100 + Map-reduce partition columns: concat(key, value, value, value, value, value, value, value, value, value) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 2.0E-5 - value expressions: - expr: key - type: string + value expressions: key (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: double + expressions: _col1 (type: double) outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/limit_pushdown_negative.q.out ql/src/test/results/clientpositive/limit_pushdown_negative.q.out index 1149720..5176031 100644 --- ql/src/test/results/clientpositive/limit_pushdown_negative.q.out +++ ql/src/test/results/clientpositive/limit_pushdown_negative.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- negative, RS + join explain select * from src a join src b on a.key=b.key limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,41 +11,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -56,23 +37,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 4120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -88,9 +64,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- negative, RS + filter explain select value, sum(key) as sum from src group by value having sum > 100 limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key)) sum)) (TOK_GROUPBY (TOK_TABLE_OR_COL value)) (TOK_HAVING (> (TOK_TABLE_OR_COL sum) 100)) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -98,63 +71,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(key) - bucketGroup: false - keys: - expr: value - type: string + aggregations: sum(key) + keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col1 > 100.0) - type: boolean + predicate: (_col1 > 100.0) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -170,9 +126,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- negative, RS + lateral view explain select key, L.* from (select * from src order by key) a lateral view explode(array(value, value)) L as v limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL value))) v (TOK_TABALIAS L))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME L)))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -180,73 +133,65 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col2 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: array(_col1,_col1) - type: array + expressions: array(_col1,_col1) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col2 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -276,9 +221,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest_2 SELECT value, sum(key) GROUP BY value INSERT OVERWRITE TABLE dest_3 SELECT value, sum(key) GROUP BY value limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL value)) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -290,75 +232,56 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: value (type: string) sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: -1 - value expressions: - expr: key - type: string + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_2 Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -379,32 +302,26 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - TopN: 20 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: double + value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Extract + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/lineage1.q.out ql/src/test/results/clientpositive/lineage1.q.out index d5bff2a..2b32507 100644 --- ql/src/test/results/clientpositive/lineage1.q.out +++ ql/src/test/results/clientpositive/lineage1.q.out @@ -29,9 +29,6 @@ FROM (SELECT t1.key, p1.value LEFT OUTER JOIN src p2 ON (t2.key = p2.key)) j POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1) t1) (TOK_TABREF (TOK_TABNAME src) p1) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL p1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL p1) value))))) (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1) t2) (TOK_TABREF (TOK_TABNAME src) p2) (= (. (TOK_TABLE_OR_COL t2) key) (. (TOK_TABLE_OR_COL p2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t2) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL p2) value)))))) j)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_l1))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME j)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-9 @@ -47,37 +44,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:j-subquery1:p1 + Map Operator Tree: TableScan - alias: p1 + alias: t1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: value - type: string - null-subquery1:j-subquery1:t1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: t1 + alias: p1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -85,18 +70,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -104,38 +85,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 62 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 62 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 62 Data size: 12786 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_l1 -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 62 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 62 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 62 Data size: 12786 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -166,12 +141,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -180,12 +153,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -200,37 +171,25 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:j-subquery2:p2 + Map Operator Tree: TableScan alias: p2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: value - type: string - null-subquery2:j-subquery2:t2 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan alias: t2 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -238,18 +197,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/list_bucket_dml_10.q.out ql/src/test/results/clientpositive/list_bucket_dml_10.q.out index 56df656..3f2bb39 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_10.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_10.q.out @@ -50,7 +50,32 @@ insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', select key, value from src POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME list_bucketing_static_part) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr '11')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + list_bucketing_static_part + TOK_PARTSPEC + TOK_PARTVAL + ds + '2008-04-08' + TOK_PARTVAL + hr + '11' + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -65,30 +90,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat diff --git ql/src/test/results/clientpositive/literal_decimal.q.out ql/src/test/results/clientpositive/literal_decimal.q.out index efc5fc1..2f2df6a 100644 --- ql/src/test/results/clientpositive/literal_decimal.q.out +++ ql/src/test/results/clientpositive/literal_decimal.q.out @@ -2,9 +2,6 @@ PREHOOK: query: EXPLAIN SELECT -1BD, 0BD, 1BD, 3.14BD, -3.14BD, 9999999999999999 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT -1BD, 0BD, 1BD, 3.14BD, -3.14BD, 99999999999999999BD, 99999999999999999.9999999999999BD, 1E-99BD, 1E99BD FROM src LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (- 1BD)) (TOK_SELEXPR 0BD) (TOK_SELEXPR 1BD) (TOK_SELEXPR 3.14BD) (TOK_SELEXPR (- 3.14BD)) (TOK_SELEXPR 99999999999999999BD) (TOK_SELEXPR 99999999999999999.9999999999999BD) (TOK_SELEXPR 1E-99BD) (TOK_SELEXPR 1E99BD)) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -15,28 +12,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: (- 1) - type: decimal(1,0) - expr: 0 - type: decimal(1,0) - expr: 1 - type: decimal(1,0) - expr: 3.14 - type: decimal(3,2) - expr: (- 3.14) - type: decimal(3,2) - expr: 99999999999999999 - type: decimal(17,0) - expr: 99999999999999999.9999999999999 - type: decimal(30,13) - expr: 1E-99 - type: decimal(1,0) - expr: 1E99 - type: decimal(1,0) + expressions: (- 1) (type: decimal(1,0)), 0 (type: decimal(1,0)), 1 (type: decimal(1,0)), 3.14 (type: decimal(3,2)), (- 3.14) (type: decimal(3,2)), 99999999999999999 (type: decimal(17,0)), 99999999999999999.9999999999999 (type: decimal(30,13)), 1E-99 (type: decimal(1,0)), 1E99 (type: decimal(1,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT -1BD, 0BD, 1BD, 3.14BD, -3.14BD, 99999999999999999BD, 99999999999999999.9999999999999BD, 1E-99BD, 1E99BD FROM src LIMIT 1 diff --git ql/src/test/results/clientpositive/literal_double.q.out ql/src/test/results/clientpositive/literal_double.q.out index c09cc3c..b3f51f4 100644 --- ql/src/test/results/clientpositive/literal_double.q.out +++ ql/src/test/results/clientpositive/literal_double.q.out @@ -2,9 +2,6 @@ PREHOOK: query: EXPLAIN SELECT 3.14, -3.14, 3.14e8, 3.14e-8, -3.14e8, -3.14e-8, PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT 3.14, -3.14, 3.14e8, 3.14e-8, -3.14e8, -3.14e-8, 3.14e+8, 3.14E8, 3.14E-8 FROM src LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 3.14) (TOK_SELEXPR (- 3.14)) (TOK_SELEXPR 3.14e8) (TOK_SELEXPR 3.14e-8) (TOK_SELEXPR (- 3.14e8)) (TOK_SELEXPR (- 3.14e-8)) (TOK_SELEXPR 3.14e+8) (TOK_SELEXPR 3.14E8) (TOK_SELEXPR 3.14E-8)) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -15,28 +12,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: 3.14 - type: double - expr: (- 3.14) - type: double - expr: 3.14E8 - type: double - expr: 3.14E-8 - type: double - expr: (- 3.14E8) - type: double - expr: (- 3.14E-8) - type: double - expr: 3.14E8 - type: double - expr: 3.14E8 - type: double - expr: 3.14E-8 - type: double + expressions: 3.14 (type: double), (- 3.14) (type: double), 3.14E8 (type: double), 3.14E-8 (type: double), (- 3.14E8) (type: double), (- 3.14E-8) (type: double), 3.14E8 (type: double), 3.14E8 (type: double), 3.14E-8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT 3.14, -3.14, 3.14e8, 3.14e-8, -3.14e8, -3.14e-8, 3.14e+8, 3.14E8, 3.14E-8 FROM src LIMIT 1 diff --git ql/src/test/results/clientpositive/literal_ints.q.out ql/src/test/results/clientpositive/literal_ints.q.out index d7eaa0d..da5b43e 100644 --- ql/src/test/results/clientpositive/literal_ints.q.out +++ ql/src/test/results/clientpositive/literal_ints.q.out @@ -2,9 +2,6 @@ PREHOOK: query: EXPLAIN SELECT 100, 100Y, 100S, 100L FROM src LIMIT 1 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT 100, 100Y, 100S, 100L FROM src LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 100) (TOK_SELEXPR 100Y) (TOK_SELEXPR 100S) (TOK_SELEXPR 100L)) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -15,18 +12,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: 100 - type: int - expr: 100 - type: tinyint - expr: 100 - type: smallint - expr: 100 - type: bigint + expressions: 100 (type: int), 100 (type: tinyint), 100 (type: smallint), 100 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT 100, 100Y, 100S, 100L FROM src LIMIT 1 diff --git ql/src/test/results/clientpositive/literal_string.q.out ql/src/test/results/clientpositive/literal_string.q.out index 1a88704..547024b 100644 --- ql/src/test/results/clientpositive/literal_string.q.out +++ ql/src/test/results/clientpositive/literal_string.q.out @@ -12,9 +12,6 @@ POSTHOOK: query: EXPLAIN SELECT 'face''book', 'face' 'book', 'face' 'face' 'bo' 'ok', 'face'"book", "face"'book', 'facebook' FROM src LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'face''book') (TOK_SELEXPR (TOK_STRINGLITERALSEQUENCE 'face' 'book')) (TOK_SELEXPR (TOK_STRINGLITERALSEQUENCE 'face' 'book')) (TOK_SELEXPR "face""book") (TOK_SELEXPR (TOK_STRINGLITERALSEQUENCE "face" "book")) (TOK_SELEXPR (TOK_STRINGLITERALSEQUENCE "face" "book")) (TOK_SELEXPR (TOK_STRINGLITERALSEQUENCE 'face' 'bo' 'ok')) (TOK_SELEXPR 'face'"book") (TOK_SELEXPR "face"'book') (TOK_SELEXPR 'facebook')) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -25,30 +22,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: 'facebook' - type: string - expr: 'facebook' - type: string - expr: 'facebook' - type: string - expr: 'facebook' - type: string - expr: 'facebook' - type: string - expr: 'facebook' - type: string - expr: 'facebook' - type: string - expr: 'facebook' - type: string - expr: 'facebook' - type: string - expr: 'facebook' - type: string + expressions: 'facebook' (type: string), 'facebook' (type: string), 'facebook' (type: string), 'facebook' (type: string), 'facebook' (type: string), 'facebook' (type: string), 'facebook' (type: string), 'facebook' (type: string), 'facebook' (type: string), 'facebook' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT 'face''book', 'face' 'book', 'face' diff --git ql/src/test/results/clientpositive/load_dyn_part1.q.out ql/src/test/results/clientpositive/load_dyn_part1.q.out index b3e59ba..7c9bf40 100644 --- ql/src/test/results/clientpositive/load_dyn_part1.q.out +++ ql/src/test/results/clientpositive/load_dyn_part1.q.out @@ -42,9 +42,6 @@ from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' insert overwrite table nzhang_part2 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part1) (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (<= (TOK_TABLE_OR_COL ds) '2008-04-08'))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part2) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-12-31') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (> (TOK_TABLE_OR_COL ds) '2008-04-08')))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 @@ -65,49 +62,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (ds <= '2008-04-08') - type: boolean + predicate: (ds <= '2008-04-08') (type: boolean) + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 Filter Operator - predicate: - expr: (ds > '2008-04-08') - type: boolean + predicate: (ds > '2008-04-08') (type: boolean) + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -141,12 +124,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -155,12 +136,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -200,12 +179,10 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -214,12 +191,10 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/load_dyn_part10.q.out ql/src/test/results/clientpositive/load_dyn_part10.q.out index ff8d343..57646a1 100644 --- ql/src/test/results/clientpositive/load_dyn_part10.q.out +++ ql/src/test/results/clientpositive/load_dyn_part10.q.out @@ -35,9 +35,6 @@ POSTHOOK: query: explain from srcpart insert overwrite table nzhang_part10 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part10) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-12-31') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (> (TOK_TABLE_OR_COL ds) '2008-04-08')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -46,22 +43,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/load_dyn_part13.q.out ql/src/test/results/clientpositive/load_dyn_part13.q.out index 5013e12..85a4470 100644 --- ql/src/test/results/clientpositive/load_dyn_part13.q.out +++ ql/src/test/results/clientpositive/load_dyn_part13.q.out @@ -49,9 +49,6 @@ select * from ( from src where key > 20 and key < 40) s POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR '22')) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 20)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR '33')) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 20) (< (TOK_TABLE_OR_COL key) 40)))))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part13) (TOK_PARTSPEC (TOK_PARTVAL ds "2010-03-03") (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -60,70 +57,50 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:s-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 20) - type: boolean + predicate: ((key > 20) and (key < 40)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: '22' - type: string + expressions: key (type: string), value (type: string), '33' (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 - null-subquery2:s-subquery2:src TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 20) and (key < 40)) - type: boolean + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: '33' - type: string + expressions: key (type: string), value (type: string), '22' (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/load_dyn_part14.q.out ql/src/test/results/clientpositive/load_dyn_part14.q.out index a966b89..a2d53d5 100644 --- ql/src/test/results/clientpositive/load_dyn_part14.q.out +++ ql/src/test/results/clientpositive/load_dyn_part14.q.out @@ -44,9 +44,6 @@ select key, value from ( select 'k3' as key, ' ' as value from src limit 2 ) T POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'k1' key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING TOK_NULL) value)) (TOK_LIMIT 2))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'k2' key) (TOK_SELEXPR '' value)) (TOK_LIMIT 2)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'k3' key) (TOK_SELEXPR ' ' value)) (TOK_LIMIT 2)))) T)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part14) (TOK_PARTSPEC (TOK_PARTVAL value)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-9, Stage-10 @@ -63,32 +60,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery2:t-subquery1-subquery2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: 'k2' - type: string - expr: '' - type: string + expressions: 'k2' (type: string), '' (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -96,56 +90,47 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 17436 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 17436 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 17436 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 17436 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 17436 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 17436 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 17436 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 17436 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 17436 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -178,12 +163,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -192,12 +175,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -212,32 +193,29 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:t-subquery2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: 'k3' - type: string - expr: ' ' - type: string + expressions: 'k3' (type: string), ' ' (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -245,32 +223,29 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery1:t-subquery1-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: 'k1' - type: string - expr: UDFToString(null) - type: string + expressions: 'k1' (type: string), UDFToString(null) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/load_dyn_part2.q.out ql/src/test/results/clientpositive/load_dyn_part2.q.out index b005aac..b46147b 100644 --- ql/src/test/results/clientpositive/load_dyn_part2.q.out +++ ql/src/test/results/clientpositive/load_dyn_part2.q.out @@ -29,9 +29,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table nzhang_part_bucket partition (ds='2010-03-23', hr) select key, value, hr from srcpart where ds is not null and hr is not null POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part_bucket) (TOK_PARTSPEC (TOK_PARTVAL ds '2010-03-23') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (and (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL ds)) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL hr)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -40,37 +37,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/load_dyn_part3.q.out ql/src/test/results/clientpositive/load_dyn_part3.q.out index f0f666f..1771948 100644 --- ql/src/test/results/clientpositive/load_dyn_part3.q.out +++ ql/src/test/results/clientpositive/load_dyn_part3.q.out @@ -33,9 +33,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part3) (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (and (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL ds)) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL hr)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -44,24 +41,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/load_dyn_part4.q.out ql/src/test/results/clientpositive/load_dyn_part4.q.out index b137dcd..ba38782 100644 --- ql/src/test/results/clientpositive/load_dyn_part4.q.out +++ ql/src/test/results/clientpositive/load_dyn_part4.q.out @@ -45,9 +45,6 @@ insert overwrite table nzhang_part4 partition (ds, hr) select key, value, ds, hr POSTHOOK: type: QUERY POSTHOOK: Lineage: nzhang_part4 PARTITION(ds=2008-04-08,hr=existing_value).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: nzhang_part4 PARTITION(ds=2008-04-08,hr=existing_value).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part4) (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (and (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL ds)) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL hr)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -56,24 +53,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/load_dyn_part5.q.out ql/src/test/results/clientpositive/load_dyn_part5.q.out index af0a196..8d9871e 100644 --- ql/src/test/results/clientpositive/load_dyn_part5.q.out +++ ql/src/test/results/clientpositive/load_dyn_part5.q.out @@ -22,9 +22,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table nzhang_part5 partition (value) select key, value from src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part5) (TOK_PARTSPEC (TOK_PARTVAL value)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -33,20 +30,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/load_dyn_part8.q.out ql/src/test/results/clientpositive/load_dyn_part8.q.out index 5a302be..968cef1 100644 --- ql/src/test/results/clientpositive/load_dyn_part8.q.out +++ ql/src/test/results/clientpositive/load_dyn_part8.q.out @@ -38,7 +38,67 @@ insert overwrite table nzhang_part8 partition (ds, hr) select key, value, ds, hr insert overwrite table nzhang_part8 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part8) (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (<= (TOK_TABLE_OR_COL ds) '2008-04-08'))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part8) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-12-31') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (> (TOK_TABLE_OR_COL ds) '2008-04-08')))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + nzhang_part8 + TOK_PARTSPEC + TOK_PARTVAL + ds + TOK_PARTVAL + hr + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_SELEXPR + TOK_TABLE_OR_COL + hr + TOK_WHERE + <= + TOK_TABLE_OR_COL + ds + '2008-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + nzhang_part8 + TOK_PARTSPEC + TOK_PARTVAL + ds + '2008-12-31' + TOK_PARTVAL + hr + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + hr + TOK_WHERE + > + TOK_TABLE_OR_COL + ds + '2008-04-08' + STAGE DEPENDENCIES: Stage-2 is a root stage @@ -50,40 +110,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart - Statistics: - numRows: 116 dataSize: 23248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (ds <= '2008-04-08') - type: boolean - Statistics: - numRows: 38 dataSize: 7615 basicStatsState: COMPLETE colStatsState: NONE + predicate: (ds <= '2008-04-08') (type: boolean) + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 38 dataSize: 7615 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 38 dataSize: 7615 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -106,30 +151,19 @@ STAGE PLANS: MultiFileSpray: false Filter Operator isSamplingPred: false - predicate: - expr: (ds > '2008-04-08') - type: boolean - Statistics: - numRows: 38 dataSize: 7615 basicStatsState: COMPLETE colStatsState: NONE + predicate: (ds > '2008-04-08') (type: boolean) + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 38 dataSize: 7615 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-12-31/ - Statistics: - numRows: 38 dataSize: 7615 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/load_dyn_part9.q.out ql/src/test/results/clientpositive/load_dyn_part9.q.out index bac7a47..67ad1a6 100644 --- ql/src/test/results/clientpositive/load_dyn_part9.q.out +++ ql/src/test/results/clientpositive/load_dyn_part9.q.out @@ -35,9 +35,6 @@ POSTHOOK: query: explain from srcpart insert overwrite table nzhang_part9 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part9) (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (<= (TOK_TABLE_OR_COL ds) '2008-04-08')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -46,24 +43,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/louter_join_ppr.q.out ql/src/test/results/clientpositive/louter_join_ppr.q.out index 1393c82..28070f9 100644 --- ql/src/test/results/clientpositive/louter_join_ppr.q.out +++ ql/src/test/results/clientpositive/louter_join_ppr.q.out @@ -17,7 +17,88 @@ POSTHOOK: query: EXPLAIN EXTENDED WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL b) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 10) (< (. (TOK_TABLE_OR_COL a) key) 20)) (> (. (TOK_TABLE_OR_COL b) key) 15)) (< (. (TOK_TABLE_OR_COL b) key) 25))))) + +TOK_QUERY + TOK_FROM + TOK_LEFTOUTERJOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_TABREF + TOK_TABNAME + srcpart + b + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -26,65 +107,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + alias: b + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string), value (type: string) TableScan - alias: b - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean - Statistics: - numRows: 6 dataSize: 1202 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 6 dataSize: 1202 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string), value (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -226,37 +279,22 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 - Statistics: - numRows: 6 dataSize: 1322 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 1322 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: ((_col4 > 15) and (_col4 < 25)) - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + predicate: ((_col4 > 15) and (_col4 < 25)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -334,7 +372,88 @@ POSTHOOK: query: EXPLAIN EXTENDED WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME srcpart) a) (TOK_TABREF (TOK_TABNAME src) b) (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 10) (< (. (TOK_TABLE_OR_COL a) key) 20)) (> (. (TOK_TABLE_OR_COL b) key) 15)) (< (. (TOK_TABLE_OR_COL b) key) 25))))) + +TOK_QUERY + TOK_FROM + TOK_LEFTOUTERJOIN + TOK_TABREF + TOK_TABNAME + srcpart + a + TOK_TABREF + TOK_TABNAME + src + b + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -343,67 +462,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a - Statistics: - numRows: 116 dataSize: 23248 basicStatsState: COMPLETE colStatsState: NONE + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean - Statistics: - numRows: 12 dataSize: 2404 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 12 dataSize: 2404 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string), value (type: string) TableScan - alias: b - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + alias: a + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string), value (type: string), ds (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -636,37 +725,22 @@ STAGE PLANS: filter predicates: 0 {(VALUE._col2 = '2008-04-08')} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: - numRows: 13 dataSize: 2644 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 13 Data size: 2644 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: ((_col6 > 15) and (_col6 < 25)) - type: boolean - Statistics: - numRows: 1 dataSize: 203 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((_col6 > 15) and (_col6 < 25)) (type: boolean) + Statistics: Num rows: 1 Data size: 203 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col6 - type: string - expr: _col7 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col7 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 1 dataSize: 203 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 203 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 203 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 203 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -748,7 +822,88 @@ POSTHOOK: query: EXPLAIN EXTENDED WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (AND (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 10) (< (. (TOK_TABLE_OR_COL a) key) 20)) (> (. (TOK_TABLE_OR_COL b) key) 15)) (< (. (TOK_TABLE_OR_COL b) key) 25)) (= (. (TOK_TABLE_OR_COL b) ds) '2008-04-08'))))) + +TOK_QUERY + TOK_FROM + TOK_LEFTOUTERJOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_TABREF + TOK_TABNAME + srcpart + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -757,67 +912,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + alias: b + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string), value (type: string), ds (type: string) TableScan - alias: b - Statistics: - numRows: 116 dataSize: 23248 basicStatsState: COMPLETE colStatsState: NONE + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean - Statistics: - numRows: 12 dataSize: 2404 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 12 dataSize: 2404 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string), value (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1045,37 +1170,22 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col6 - Statistics: - numRows: 13 dataSize: 2644 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 13 Data size: 2644 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: (((_col4 > 15) and (_col4 < 25)) and (_col6 = '2008-04-08')) - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + predicate: (((_col4 > 15) and (_col4 < 25)) and (_col6 = '2008-04-08')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1157,7 +1267,88 @@ POSTHOOK: query: EXPLAIN EXTENDED WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND a.ds = '2008-04-08' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME srcpart) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (AND (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 10) (< (. (TOK_TABLE_OR_COL a) key) 20)) (> (. (TOK_TABLE_OR_COL b) key) 15)) (< (. (TOK_TABLE_OR_COL b) key) 25)) (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08'))))) + +TOK_QUERY + TOK_FROM + TOK_LEFTOUTERJOIN + TOK_TABREF + TOK_TABNAME + srcpart + a + TOK_TABREF + TOK_TABNAME + src + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1166,65 +1357,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean - Statistics: - numRows: 6 dataSize: 1202 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 6 dataSize: 1202 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string), value (type: string) TableScan - alias: b - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + alias: a + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string), value (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1366,37 +1529,22 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: - numRows: 6 dataSize: 1322 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 1322 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: ((_col6 > 15) and (_col6 < 25)) - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + predicate: ((_col6 > 15) and (_col6 < 25)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col6 - type: string - expr: _col7 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col7 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/macro.q.out ql/src/test/results/clientpositive/macro.q.out index 2bc004c..c483029 100644 --- ql/src/test/results/clientpositive/macro.q.out +++ ql/src/test/results/clientpositive/macro.q.out @@ -15,9 +15,6 @@ PREHOOK: query: EXPLAIN SELECT SIGMOID(2) FROM src LIMIT 1 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT SIGMOID(2) FROM src LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SIGMOID 2))) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -28,12 +25,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: SIGMOID(2) - type: double + expressions: SIGMOID(2) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT SIGMOID(2) FROM src LIMIT 1 @@ -41,7 +40,24 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT SIGMOID(2) FROM src LIMIT 1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SIGMOID 2))) (TOK_LIMIT 1))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + SIGMOID + 2 + TOK_LIMIT + 1 + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -53,19 +69,15 @@ STAGE PLANS: Processor Tree: TableScan alias: src - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: SIGMOID(2) - type: double + expressions: SIGMOID(2) (type: double) outputColumnNames: _col0 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: DROP TEMPORARY MACRO SIGMOID @@ -89,9 +101,6 @@ PREHOOK: query: EXPLAIN SELECT FIXED_NUMBER() + 1 FROM src LIMIT 1 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT FIXED_NUMBER() + 1 FROM src LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_FUNCTION FIXED_NUMBER) 1))) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -102,12 +111,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: (FIXED_NUMBER() + 1) - type: int + expressions: (FIXED_NUMBER() + 1) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT FIXED_NUMBER() + 1 FROM src LIMIT 1 @@ -115,7 +126,25 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT FIXED_NUMBER() + 1 FROM src LIMIT 1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_FUNCTION FIXED_NUMBER) 1))) (TOK_LIMIT 1))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + + + TOK_FUNCTION + FIXED_NUMBER + 1 + TOK_LIMIT + 1 + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -127,19 +156,15 @@ STAGE PLANS: Processor Tree: TableScan alias: src - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: (FIXED_NUMBER() + 1) - type: int + expressions: (FIXED_NUMBER() + 1) (type: int) outputColumnNames: _col0 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: DROP TEMPORARY MACRO FIXED_NUMBER @@ -184,9 +209,6 @@ PREHOOK: query: EXPLAIN SELECT SIMPLE_ADD(1, 9) FROM src LIMIT 1 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT SIMPLE_ADD(1, 9) FROM src LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SIMPLE_ADD 1 9))) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -197,12 +219,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: SIMPLE_ADD(1, 9) - type: int + expressions: SIMPLE_ADD(1, 9) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT SIMPLE_ADD(1, 9) FROM src LIMIT 1 @@ -210,7 +234,25 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT SIMPLE_ADD(1, 9) FROM src LIMIT 1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SIMPLE_ADD 1 9))) (TOK_LIMIT 1))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + SIMPLE_ADD + 1 + 9 + TOK_LIMIT + 1 + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -222,19 +264,15 @@ STAGE PLANS: Processor Tree: TableScan alias: src - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: SIMPLE_ADD(1, 9) - type: int + expressions: SIMPLE_ADD(1, 9) (type: int) outputColumnNames: _col0 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: DROP TEMPORARY MACRO SIMPLE_ADD diff --git ql/src/test/results/clientpositive/mapjoin1.q.out ql/src/test/results/clientpositive/mapjoin1.q.out index dd85635..76d814e 100644 --- ql/src/test/results/clientpositive/mapjoin1.q.out +++ ql/src/test/results/clientpositive/mapjoin1.q.out @@ -29,9 +29,6 @@ POSTHOOK: query: -- const filter on outer join EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND true limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) true))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -39,10 +36,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 @@ -52,27 +49,21 @@ STAGE PLANS: filter predicates: 0 1 {true} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2060 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2060 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -87,6 +78,7 @@ STAGE PLANS: a TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -118,9 +110,6 @@ POSTHOOK: query: -- func filter on outer join EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND b.key * 10 < '1000' limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (< (* (. (TOK_TABLE_OR_COL b) key) 10) '1000')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -128,10 +117,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 @@ -141,27 +130,21 @@ STAGE PLANS: filter predicates: 0 1 {((key * 10) < '1000')} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2060 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2060 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -176,6 +159,7 @@ STAGE PLANS: a TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -209,9 +193,6 @@ EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN (select key, named_struct('key', key, 'value', value) as kv from src) b on a.key=b.key AND b.kv.key > 200 limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION named_struct 'key' (TOK_TABLE_OR_COL key) 'value' (TOK_TABLE_OR_COL value)) kv)))) b) (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (> (. (. (TOK_TABLE_OR_COL b) kv) key) 200)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -219,17 +200,14 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: named_struct('key',key,'value',value) - type: struct + expressions: key (type: string), named_struct('key',key,'value',value) (type: struct) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 @@ -239,27 +217,21 @@ STAGE PLANS: filter predicates: 0 1 {(_col1.key > 200)} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[_col0]] + 0 key (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: struct + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2060 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2060 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -274,6 +246,7 @@ STAGE PLANS: a TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -305,9 +278,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND true limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) true))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -324,57 +294,48 @@ STAGE PLANS: a TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: true - type: boolean + predicate: true (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE HashTable Sink Operator condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 1 + 0 key (type: string) + 1 key (type: string) Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: true - type: boolean + predicate: true (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -410,9 +371,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND b.key * 10 < '1000' limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (< (* (. (TOK_TABLE_OR_COL b) key) 10) '1000')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -429,57 +387,48 @@ STAGE PLANS: a TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key * 10) < '1000') - type: boolean + predicate: ((key * 10) < '1000') (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 1 + 0 key (type: string) + 1 key (type: string) Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key * 10) < '1000') - type: boolean + predicate: ((key * 10) < '1000') (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 + Statistics: Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -517,9 +466,6 @@ POSTHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN (select key, named_struct('key', key, 'value', value) as kv from src) b on a.key=b.key AND b.kv.key > 200 limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION named_struct 'key' (TOK_TABLE_OR_COL key) 'value' (TOK_TABLE_OR_COL value)) kv)))) b) (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (> (. (. (TOK_TABLE_OR_COL b) kv) key) 200)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -527,48 +473,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: named_struct('key',key,'value',value) - type: struct + expressions: key (type: string), named_struct('key',key,'value',value) (type: struct) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col1.key > 200) - type: boolean + predicate: (_col1.key > 200) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 condition expressions: 0 {key} {value} 1 {_col0} {_col1} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[_col0]] + 0 key (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: struct + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2060 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2060 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -583,6 +519,7 @@ STAGE PLANS: a TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/mapjoin_distinct.q.out ql/src/test/results/clientpositive/mapjoin_distinct.q.out index 7bc5b9d..434acc4 100644 --- ql/src/test/results/clientpositive/mapjoin_distinct.q.out +++ ql/src/test/results/clientpositive/mapjoin_distinct.q.out @@ -10,9 +10,6 @@ JOIN srcpart d ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') SELECT /*+ MAPJOIN(d) */ DISTINCT c.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-3 depends on stages: Stage-1 @@ -21,43 +18,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col1 - Position of Big Table: 0 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col1 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col1 - type: string + keys: _col1 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -68,17 +57,15 @@ STAGE PLANS: d TableScan alias: d + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -86,34 +73,26 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6342 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6342 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 63 Data size: 6342 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -163,9 +142,6 @@ JOIN srcpart d ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') SELECT /*+ MAPJOIN(d) */ DISTINCT c.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -173,43 +149,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col1 - Position of Big Table: 0 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col1 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col1 - type: string + keys: _col1 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -220,22 +188,20 @@ STAGE PLANS: d TableScan alias: d + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6342 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6342 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 63 Data size: 6342 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -285,9 +251,6 @@ JOIN srcpart d ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') SELECT /*+ MAPJOIN(d) */ DISTINCT c.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-3 depends on stages: Stage-1 @@ -296,36 +259,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col1 - Position of Big Table: 0 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col1 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -336,17 +293,15 @@ STAGE PLANS: d TableScan alias: d + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -354,34 +309,26 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6342 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6342 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 63 Data size: 6342 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -431,9 +378,6 @@ JOIN srcpart d ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') SELECT /*+ MAPJOIN(d) */ DISTINCT c.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -441,36 +385,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col1 - Position of Big Table: 0 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col1 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -481,22 +419,20 @@ STAGE PLANS: d TableScan alias: d + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6342 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6342 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 63 Data size: 6342 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out index c09ad12..450013b 100644 --- ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out +++ ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out @@ -52,9 +52,6 @@ SELECT /*+ mapjoin(src1, src2) */ * FROM src1 JOIN src src3 ON (src2.key = src3.key AND src3.key < 300) SORT BY src1.key, src2.key, src3.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1)) (TOK_TABREF (TOK_TABNAME src1) src2) (AND (AND (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)) (< (. (TOK_TABLE_OR_COL src1) key) 10)) (> (. (TOK_TABLE_OR_COL src2) key) 10))) (TOK_TABREF (TOK_TABNAME src) src3) (AND (= (. (TOK_TABLE_OR_COL src2) key) (. (TOK_TABLE_OR_COL src3) key)) (< (. (TOK_TABLE_OR_COL src3) key) 300)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src1 src2))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src3) key))))) - STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 @@ -74,10 +71,10 @@ STAGE PLANS: src1 TableScan alias: src1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 300) and (key < 10)) - type: boolean + predicate: ((key < 300) and (key < 10)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE HashTable Sink Operator condition expressions: 0 {key} {value} @@ -87,19 +84,17 @@ STAGE PLANS: 0 1 {(key > 10)} 2 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - Position of Big Table: 2 + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) src2 TableScan alias: src2 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 300) - type: boolean + predicate: (key < 300) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE HashTable Sink Operator condition expressions: 0 {key} {value} @@ -109,23 +104,20 @@ STAGE PLANS: 0 1 {(key > 10)} 2 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - Position of Big Table: 2 + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src3 + Map Operator Tree: TableScan alias: src3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 300) - type: boolean + predicate: (key < 300) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 @@ -138,58 +130,29 @@ STAGE PLANS: 0 1 {(key > 10)} 2 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 + Statistics: Num rows: 19 Data size: 3966 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 19 Data size: 3966 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col2 - type: string - expr: _col4 - type: string + key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + Statistics: Num rows: 19 Data size: 3966 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Local Work: Map Reduce Local Work Reduce Operator Tree: Extract + Statistics: Num rows: 19 Data size: 3966 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 19 Data size: 3966 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -249,9 +212,6 @@ SELECT * FROM src1 JOIN src src3 ON (src2.key = src3.key AND src3.key < 300) SORT BY src1.key, src2.key, src3.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1)) (TOK_TABREF (TOK_TABNAME src1) src2) (AND (AND (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)) (< (. (TOK_TABLE_OR_COL src1) key) 10)) (> (. (TOK_TABLE_OR_COL src2) key) 10))) (TOK_TABREF (TOK_TABNAME src) src3) (AND (= (. (TOK_TABLE_OR_COL src2) key) (. (TOK_TABLE_OR_COL src3) key)) (< (. (TOK_TABLE_OR_COL src3) key) 300)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src3) key))))) - STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 @@ -271,10 +231,10 @@ STAGE PLANS: src1 TableScan alias: src1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 300) and (key < 10)) - type: boolean + predicate: ((key < 300) and (key < 10)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE HashTable Sink Operator condition expressions: 0 {key} {value} @@ -284,19 +244,17 @@ STAGE PLANS: 0 1 {(key > 10)} 2 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - Position of Big Table: 2 + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) src2 TableScan alias: src2 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 300) - type: boolean + predicate: (key < 300) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE HashTable Sink Operator condition expressions: 0 {key} {value} @@ -306,23 +264,20 @@ STAGE PLANS: 0 1 {(key > 10)} 2 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - Position of Big Table: 2 + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src3 + Map Operator Tree: TableScan alias: src3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 300) - type: boolean + predicate: (key < 300) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 @@ -335,58 +290,29 @@ STAGE PLANS: 0 1 {(key > 10)} 2 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 + Statistics: Num rows: 19 Data size: 3966 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 19 Data size: 3966 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col2 - type: string - expr: _col4 - type: string + key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + Statistics: Num rows: 19 Data size: 3966 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Local Work: Map Reduce Local Work Reduce Operator Tree: Extract + Statistics: Num rows: 19 Data size: 3966 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 19 Data size: 3966 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out index a2bd45a..2d2501d 100644 --- ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out +++ ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out @@ -6,9 +6,6 @@ POSTHOOK: query: -- Since the inputs are small, it should be automatically conve explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart)) (TOK_TABREF (TOK_TABNAME src)) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) key))))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-0 is a root stage @@ -16,42 +13,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[value]] - 1 [Column[value]] + 0 value (type: string) + 1 value (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 127 Data size: 25572 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] + 0 _col0 (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 139 Data size: 28129 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 139 Data size: 28129 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 139 Data size: 28129 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -69,9 +63,11 @@ STAGE PLANS: src TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE src1 TableScan alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -81,9 +77,6 @@ PREHOOK: query: explain select count(*) from srcpart join src on (srcpart.value= PREHOOK: type: QUERY POSTHOOK: query: explain select count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart)) (TOK_TABREF (TOK_TABNAME src)) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds)))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 is a root stage @@ -91,60 +84,48 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {ds} 1 - handleSkewJoin: false keys: - 0 [Column[value]] - 1 [Column[value]] + 0 value (type: string) + 1 value (type: string) outputColumnNames: _col0, _col2 - Position of Big Table: 0 + Statistics: Num rows: 127 Data size: 25572 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col2} 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] + 0 _col0 (type: string) + 1 key (type: string) outputColumnNames: _col2 - Position of Big Table: 0 + Statistics: Num rows: 139 Data size: 28129 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string + expressions: _col2 (type: string) outputColumnNames: _col2 + Statistics: Num rows: 139 Data size: 28129 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col2 - type: string + aggregations: count() + keys: _col2 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 139 Data size: 28129 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 139 Data size: 28129 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -158,27 +139,25 @@ STAGE PLANS: src TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE src1 TableScan alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 13963 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: bigint + expressions: _col1 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 69 Data size: 13963 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 69 Data size: 13963 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/mapjoin_subquery.q.out ql/src/test/results/clientpositive/mapjoin_subquery.q.out index e3a8fca..d102cc9 100644 --- ql/src/test/results/clientpositive/mapjoin_subquery.q.out +++ ql/src/test/results/clientpositive/mapjoin_subquery.q.out @@ -16,9 +16,6 @@ FROM FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-0 is a root stage @@ -26,49 +23,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - subq:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} 1 {value} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] + 0 _col0 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col5 - Position of Big Table: 0 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -86,9 +77,11 @@ STAGE PLANS: subq:x TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE z TableScan alias: z + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -241,9 +234,6 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) order by subq.key1, z.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq) key1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL z) value))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 is a root stage @@ -251,59 +241,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - subq:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} 1 {value} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] + 0 _col0 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col5 - Position of Big Table: 0 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -317,14 +293,17 @@ STAGE PLANS: subq:x TableScan alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE z TableScan alias: z + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Extract + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/mapjoin_subquery2.q.out ql/src/test/results/clientpositive/mapjoin_subquery2.q.out index 70e058c..82acdb6 100644 --- ql/src/test/results/clientpositive/mapjoin_subquery2.q.out +++ ql/src/test/results/clientpositive/mapjoin_subquery2.q.out @@ -67,9 +67,6 @@ FROM FROM y JOIN x ON (x.id = y.id)) subq JOIN z ON (subq.key1 = z.id) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME y)) (TOK_TABREF (TOK_TABNAME x)) (= (. (TOK_TABLE_OR_COL x) id) (. (TOK_TABLE_OR_COL y) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) id) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) name) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) id) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) name) value2)))) subq) (TOK_TABREF (TOK_TABNAME z)) (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) value1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) value2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) id)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) name))))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-0 is a root stage @@ -77,63 +74,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - subq:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 0 Data size: 13 Basic stats: PARTIAL Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {id} {name} 1 {name} {id} - handleSkewJoin: false keys: - 0 [Column[id]] - 1 [Column[id]] + 0 id (type: int) + 1 id (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 + Statistics: Num rows: 0 Data size: 6 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: _col5 - type: int - expr: _col4 - type: string - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col5 (type: int), _col4 (type: string), _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 6 Basic stats: PARTIAL Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} 1 {id} {name} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[id]] + 0 _col0 (type: int) + 1 id (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Position of Big Table: 0 + Statistics: Num rows: 0 Data size: 6 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 6 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 6 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -151,9 +128,11 @@ STAGE PLANS: subq:y TableScan alias: y + Statistics: Num rows: 0 Data size: 6 Basic stats: PARTIAL Column stats: NONE z TableScan alias: z + Statistics: Num rows: 0 Data size: 6 Basic stats: PARTIAL Column stats: NONE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/mapjoin_test_outer.q.out ql/src/test/results/clientpositive/mapjoin_test_outer.q.out index 525650e..36deabb 100644 --- ql/src/test/results/clientpositive/mapjoin_test_outer.q.out +++ ql/src/test/results/clientpositive/mapjoin_test_outer.q.out @@ -259,9 +259,6 @@ POSTHOOK: Lineage: dest_1.value SIMPLE [(src1)src1.FieldSchema(name:value, type: POSTHOOK: Lineage: dest_1.value SIMPLE [] POSTHOOK: Lineage: dest_2.key SIMPLE [(dest_1)dest_1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest_2.value SIMPLE [(dest_1)dest_1.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1)) (TOK_TABREF (TOK_TABNAME dest_1) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key))) (TOK_TABREF (TOK_TABNAME dest_2) src3) (= (. (TOK_TABLE_OR_COL src2) key) (. (TOK_TABLE_OR_COL src3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src1 src2))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src3) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src3) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -269,10 +266,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src3 + Map Operator Tree: TableScan alias: src3 + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 @@ -281,57 +278,21 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 + Statistics: Num rows: 19 Data size: 88 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 19 Data size: 88 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + Statistics: Num rows: 19 Data size: 88 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -345,14 +306,17 @@ STAGE PLANS: src1 TableScan alias: src1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE src2 TableScan alias: src2 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Extract + Statistics: Num rows: 19 Data size: 88 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 19 Data size: 88 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1133,9 +1097,6 @@ POSTHOOK: Lineage: dest_1.value SIMPLE [(src1)src1.FieldSchema(name:value, type: POSTHOOK: Lineage: dest_1.value SIMPLE [] POSTHOOK: Lineage: dest_2.key SIMPLE [(dest_1)dest_1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest_2.value SIMPLE [(dest_1)dest_1.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1)) (TOK_TABREF (TOK_TABNAME dest_1) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key))) (TOK_TABREF (TOK_TABNAME dest_2) src3) (= (. (TOK_TABLE_OR_COL src2) key) (. (TOK_TABLE_OR_COL src3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src3) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src3) value))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 is a root stage @@ -1143,10 +1104,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src2 + Map Operator Tree: TableScan alias: src2 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 @@ -1155,57 +1116,21 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 1 + Statistics: Num rows: 19 Data size: 88 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 19 Data size: 88 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + Statistics: Num rows: 19 Data size: 88 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -1219,14 +1144,17 @@ STAGE PLANS: src1 TableScan alias: src1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE src3 TableScan alias: src3 + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Extract + Statistics: Num rows: 19 Data size: 88 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 19 Data size: 88 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/mapreduce1.q.out ql/src/test/results/clientpositive/mapreduce1.q.out index 0ac3982..3fd5ffc 100644 --- ql/src/test/results/clientpositive/mapreduce1.q.out +++ ql/src/test/results/clientpositive/mapreduce1.q.out @@ -19,9 +19,6 @@ USING 'cat' AS (tkey, ten, one, tvalue) DISTRIBUTE BY tvalue, tkey SORT BY ten, one POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (TOK_FUNCTION TOK_INT (/ (. (TOK_TABLE_OR_COL src) key) 10)) (TOK_FUNCTION TOK_INT (% (. (TOK_TABLE_OR_COL src) key) 10)) (. (TOK_TABLE_OR_COL src) value)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST tkey ten one tvalue)))) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL tvalue) (TOK_TABLE_OR_COL tkey)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ten)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL one))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -30,65 +27,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: UDFToInteger((key / 10)) - type: int - expr: UDFToInteger((key % 10)) - type: int - expr: value - type: string + expressions: key (type: string), UDFToInteger((key / 10)) (type: int), UDFToInteger((key % 10)) (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col1 (type: string), _col2 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col3 - type: string - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + Map-reduce partition columns: _col3 (type: string), _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int - expr: _col3 - type: string + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/mapreduce2.q.out ql/src/test/results/clientpositive/mapreduce2.q.out index 40e2ef7..01ed783 100644 --- ql/src/test/results/clientpositive/mapreduce2.q.out +++ ql/src/test/results/clientpositive/mapreduce2.q.out @@ -17,9 +17,6 @@ MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value USING 'cat' AS (tkey, ten, one, tvalue) DISTRIBUTE BY tvalue, tkey POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (TOK_FUNCTION TOK_INT (/ (. (TOK_TABLE_OR_COL src) key) 10)) (TOK_FUNCTION TOK_INT (% (. (TOK_TABLE_OR_COL src) key) 10)) (. (TOK_TABLE_OR_COL src) value)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST tkey ten one tvalue)))) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL tvalue) (TOK_TABLE_OR_COL tkey)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -28,60 +25,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: UDFToInteger((key / 10)) - type: int - expr: UDFToInteger((key % 10)) - type: int - expr: value - type: string + expressions: key (type: string), UDFToInteger((key / 10)) (type: int), UDFToInteger((key % 10)) (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: _col3 - type: string - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + Map-reduce partition columns: _col3 (type: string), _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int - expr: _col3 - type: string + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/mapreduce3.q.out ql/src/test/results/clientpositive/mapreduce3.q.out index d380473..7724e01 100644 --- ql/src/test/results/clientpositive/mapreduce3.q.out +++ ql/src/test/results/clientpositive/mapreduce3.q.out @@ -17,9 +17,6 @@ MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value USING 'cat' AS (tkey, ten, one, tvalue) SORT BY tvalue, tkey POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (TOK_FUNCTION TOK_INT (/ (. (TOK_TABLE_OR_COL src) key) 10)) (TOK_FUNCTION TOK_INT (% (. (TOK_TABLE_OR_COL src) key) 10)) (. (TOK_TABLE_OR_COL src) value)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST tkey ten one tvalue)))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL tvalue)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL tkey))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -28,60 +25,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: UDFToInteger((key / 10)) - type: int - expr: UDFToInteger((key % 10)) - type: int - expr: value - type: string + expressions: key (type: string), UDFToInteger((key / 10)) (type: int), UDFToInteger((key % 10)) (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col3 - type: string - expr: _col0 - type: string + key expressions: _col3 (type: string), _col0 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int - expr: _col3 - type: string + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/mapreduce4.q.out ql/src/test/results/clientpositive/mapreduce4.q.out index f9ff7a1..dea234d 100644 --- ql/src/test/results/clientpositive/mapreduce4.q.out +++ ql/src/test/results/clientpositive/mapreduce4.q.out @@ -19,9 +19,6 @@ USING 'cat' AS (tkey, ten, one, tvalue) DISTRIBUTE BY tvalue, tkey SORT BY ten DESC, one ASC POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (TOK_FUNCTION TOK_INT (/ (. (TOK_TABLE_OR_COL src) key) 10)) (TOK_FUNCTION TOK_INT (% (. (TOK_TABLE_OR_COL src) key) 10)) (. (TOK_TABLE_OR_COL src) value)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST tkey ten one tvalue)))) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL tvalue) (TOK_TABLE_OR_COL tkey)) (TOK_SORTBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL ten)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL one))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -30,65 +27,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: UDFToInteger((key / 10)) - type: int - expr: UDFToInteger((key % 10)) - type: int - expr: value - type: string + expressions: key (type: string), UDFToInteger((key / 10)) (type: int), UDFToInteger((key % 10)) (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col1 (type: string), _col2 (type: string) sort order: -+ - Map-reduce partition columns: - expr: _col3 - type: string - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + Map-reduce partition columns: _col3 (type: string), _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int - expr: _col3 - type: string + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/mapreduce5.q.out ql/src/test/results/clientpositive/mapreduce5.q.out index 91fbcfe..074d22c 100644 --- ql/src/test/results/clientpositive/mapreduce5.q.out +++ ql/src/test/results/clientpositive/mapreduce5.q.out @@ -17,9 +17,6 @@ SELECT src.key as c1, CAST(src.key / 10 AS INT) as c2, CAST(src.key % 10 AS INT) DISTRIBUTE BY c4, c1 SORT BY c2 DESC, c3 ASC POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c1) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (/ (. (TOK_TABLE_OR_COL src) key) 10)) c2) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (% (. (TOK_TABLE_OR_COL src) key) 10)) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c4)) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL c4) (TOK_TABLE_OR_COL c1)) (TOK_SORTBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL c2)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL c3))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -28,59 +25,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: UDFToInteger((key / 10)) - type: int - expr: UDFToInteger((key % 10)) - type: int - expr: value - type: string + expressions: key (type: string), UDFToInteger((key / 10)) (type: int), UDFToInteger((key % 10)) (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: int - expr: _col2 - type: int + key expressions: _col1 (type: int), _col2 (type: int) sort order: -+ - Map-reduce partition columns: - expr: _col3 - type: string - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: int - expr: _col3 - type: string + Map-reduce partition columns: _col3 (type: string), _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: int - expr: _col2 - type: int - expr: _col3 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/mapreduce6.q.out ql/src/test/results/clientpositive/mapreduce6.q.out index 37fab6d..d7edf87 100644 --- ql/src/test/results/clientpositive/mapreduce6.q.out +++ ql/src/test/results/clientpositive/mapreduce6.q.out @@ -17,9 +17,6 @@ SELECT src.key, CAST(src.key / 10 AS INT) as c2, CAST(src.key % 10 AS INT) as c3 DISTRIBUTE BY value, key SORT BY c2 DESC, c3 ASC POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (/ (. (TOK_TABLE_OR_COL src) key) 10)) c2) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (% (. (TOK_TABLE_OR_COL src) key) 10)) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL c2)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL c3))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -28,59 +25,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: UDFToInteger((key / 10)) - type: int - expr: UDFToInteger((key % 10)) - type: int - expr: value - type: string + expressions: key (type: string), UDFToInteger((key / 10)) (type: int), UDFToInteger((key % 10)) (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: int - expr: _col2 - type: int + key expressions: _col1 (type: int), _col2 (type: int) sort order: -+ - Map-reduce partition columns: - expr: _col3 - type: string - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int - expr: _col2 - type: int - expr: _col3 - type: string + Map-reduce partition columns: _col3 (type: string), _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: int - expr: _col2 - type: int - expr: _col3 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/mapreduce7.q.out ql/src/test/results/clientpositive/mapreduce7.q.out index 4025204..785eeb4 100644 --- ql/src/test/results/clientpositive/mapreduce7.q.out +++ ql/src/test/results/clientpositive/mapreduce7.q.out @@ -17,9 +17,6 @@ MAP src.*, src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.va USING 'cat' AS (k, v, tkey, ten, one, tvalue) SORT BY tvalue, tkey POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_ALLCOLREF (TOK_TABNAME src)) (. (TOK_TABLE_OR_COL src) key) (TOK_FUNCTION TOK_INT (/ (. (TOK_TABLE_OR_COL src) key) 10)) (TOK_FUNCTION TOK_INT (% (. (TOK_TABLE_OR_COL src) key) 10)) (. (TOK_TABLE_OR_COL src) value)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST k v tkey ten one tvalue)))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL tvalue)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL tkey))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -28,72 +25,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: key - type: string - expr: UDFToInteger((key / 10)) - type: int - expr: UDFToInteger((key % 10)) - type: int - expr: value - type: string + expressions: key (type: string), value (type: string), key (type: string), UDFToInteger((key / 10)) (type: int), UDFToInteger((key % 10)) (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col5 - type: string - expr: _col2 - type: string + key expressions: _col5 (type: string), _col2 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: UDFToInteger(_col2) - type: int - expr: UDFToInteger(_col3) - type: int - expr: UDFToInteger(_col4) - type: int - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/mapreduce8.q.out ql/src/test/results/clientpositive/mapreduce8.q.out index caaad29..bbccc85 100644 --- ql/src/test/results/clientpositive/mapreduce8.q.out +++ ql/src/test/results/clientpositive/mapreduce8.q.out @@ -19,9 +19,6 @@ USING 'cat' AS (k, v, tkey, ten, one, tvalue) DISTRIBUTE BY rand(3) SORT BY tvalue, tkey POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_ALLCOLREF (TOK_TABNAME src)) (. (TOK_TABLE_OR_COL src) key) (TOK_FUNCTION TOK_INT (/ (. (TOK_TABLE_OR_COL src) key) 10)) (TOK_FUNCTION TOK_INT (% (. (TOK_TABLE_OR_COL src) key) 10)) (. (TOK_TABLE_OR_COL src) value)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST k v tkey ten one tvalue)))) (TOK_DISTRIBUTEBY (TOK_FUNCTION rand 3)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL tvalue)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL tkey))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -30,75 +27,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: key - type: string - expr: UDFToInteger((key / 10)) - type: int - expr: UDFToInteger((key % 10)) - type: int - expr: value - type: string + expressions: key (type: string), value (type: string), key (type: string), UDFToInteger((key / 10)) (type: int), UDFToInteger((key % 10)) (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col5 - type: string - expr: _col2 - type: string + key expressions: _col5 (type: string), _col2 (type: string) sort order: ++ - Map-reduce partition columns: - expr: rand(3) - type: double - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + Map-reduce partition columns: rand(3) (type: double) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: UDFToInteger(_col2) - type: int - expr: UDFToInteger(_col3) - type: int - expr: UDFToInteger(_col4) - type: int - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/merge1.q.out ql/src/test/results/clientpositive/merge1.q.out index ceb98aa..48bbcf6 100644 --- ql/src/test/results/clientpositive/merge1.q.out +++ ql/src/test/results/clientpositive/merge1.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: explain insert overwrite table dest1 select key, count(1) from src group by key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -27,56 +24,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -107,12 +88,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -121,12 +100,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -532,9 +509,6 @@ POSTHOOK: Lineage: test_src PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(n POSTHOOK: Lineage: test_src PARTITION(ds=101).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_src PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_src PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -548,18 +522,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test_src + Map Operator Tree: TableScan alias: test_src + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -590,12 +563,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -604,12 +575,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -654,9 +623,6 @@ POSTHOOK: Lineage: test_src PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(n POSTHOOK: Lineage: test_src PARTITION(ds=101).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_src PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_src PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -670,18 +636,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test_src + Map Operator Tree: TableScan alias: test_src + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -712,12 +677,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -726,12 +689,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/merge2.q.out ql/src/test/results/clientpositive/merge2.q.out index 9856890..3c66505 100644 --- ql/src/test/results/clientpositive/merge2.q.out +++ ql/src/test/results/clientpositive/merge2.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: explain insert overwrite table test1 select key, count(1) from src group by key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -27,56 +24,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -107,12 +88,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -121,12 +100,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -532,9 +509,6 @@ POSTHOOK: Lineage: test_src PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(n POSTHOOK: Lineage: test_src PARTITION(ds=101).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_src PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_src PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -548,18 +522,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test_src + Map Operator Tree: TableScan alias: test_src + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -590,12 +563,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -604,12 +575,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -654,9 +623,6 @@ POSTHOOK: Lineage: test_src PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(n POSTHOOK: Lineage: test_src PARTITION(ds=101).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_src PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_src PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -670,18 +636,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test_src + Map Operator Tree: TableScan alias: test_src + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -712,12 +677,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -726,12 +689,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/merge3.q.out ql/src/test/results/clientpositive/merge3.q.out index 6583b35..3df75b7 100644 --- ql/src/test/results/clientpositive/merge3.q.out +++ ql/src/test/results/clientpositive/merge3.q.out @@ -54,7 +54,28 @@ POSTHOOK: Lineage: merge_src_part PARTITION(ds=2008-04-08).value SIMPLE [(srcpar POSTHOOK: Lineage: merge_src_part PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: merge_src_part PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME merge_src2) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME merge_src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) + +TOK_CREATETABLE + TOK_TABNAME + merge_src2 + TOK_LIKETABLE + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + merge_src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -70,29 +91,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - merge_src + Map Operator Tree: TableScan alias: merge_src - Statistics: - numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -175,12 +188,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: key string, value string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: merge_src2 - isExternal: false Stage: Stage-2 Stats-Aggr Operator @@ -188,8 +198,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -243,8 +252,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -2389,7 +2397,36 @@ POSTHOOK: Lineage: merge_src_part PARTITION(ds=2008-04-08).value SIMPLE [(srcpar POSTHOOK: Lineage: merge_src_part PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: merge_src_part PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME merge_src_part))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME merge_src_part2) (TOK_PARTSPEC (TOK_PARTVAL ds)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL ds))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + merge_src_part + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + merge_src_part2 + TOK_PARTSPEC + TOK_PARTVAL + ds + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + TOK_ISNOTNULL + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2404,31 +2441,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - merge_src_part + Map Operator Tree: TableScan alias: merge_src_part - Statistics: - numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string + expressions: key (type: string), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2577,8 +2604,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -2647,8 +2673,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -4828,7 +4853,50 @@ POSTHOOK: Lineage: merge_src_part2 PARTITION(ds=2008-04-08).value SIMPLE [(merge POSTHOOK: Lineage: merge_src_part2 PARTITION(ds=2008-04-09).key SIMPLE [(merge_src_part)merge_src_part.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: merge_src_part2 PARTITION(ds=2008-04-09).value SIMPLE [(merge_src_part)merge_src_part.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME merge_src_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL ds))) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL ds)))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME merge_src_part2) (TOK_PARTSPEC (TOK_PARTVAL ds)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + merge_src_part + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + TOK_FUNCTION + TOK_ISNOTNULL + TOK_TABLE_OR_COL + ds + TOK_DISTRIBUTEBY + TOK_TABLE_OR_COL + ds + s + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + merge_src_part2 + TOK_PARTSPEC + TOK_PARTVAL + ds + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -4843,39 +4911,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s:merge_src_part + Map Operator Tree: TableScan alias: merge_src_part - Statistics: - numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string + expressions: key (type: string), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: _col2 - type: string - Statistics: - numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4967,26 +5017,17 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -5047,8 +5088,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -5117,8 +5157,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/merge4.q.out ql/src/test/results/clientpositive/merge4.q.out index 7487846..ef4beba 100644 --- ql/src/test/results/clientpositive/merge4.q.out +++ ql/src/test/results/clientpositive/merge4.q.out @@ -9,9 +9,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table nzhang_part partition (ds='2010-08-15', hr) select key, value, hr from srcpart where ds='2008-04-08' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part) (TOK_PARTSPEC (TOK_PARTVAL ds '2010-08-15') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2008-04-08')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -25,22 +22,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -74,12 +66,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -88,12 +78,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1149,9 +1137,6 @@ POSTHOOK: Lineage: nzhang_part PARTITION(ds=2010-08-15,hr=11).key SIMPLE [(srcpa POSTHOOK: Lineage: nzhang_part PARTITION(ds=2010-08-15,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: nzhang_part PARTITION(ds=2010-08-15,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: nzhang_part PARTITION(ds=2010-08-15,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part) (TOK_PARTSPEC (TOK_PARTVAL ds '2010-08-15') (TOK_PARTVAL hr 11)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2008-04-08')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -1165,20 +1150,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1212,12 +1194,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1226,12 +1206,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2800,9 +2778,6 @@ POSTHOOK: Lineage: nzhang_part PARTITION(ds=2010-08-15,hr=11).key SIMPLE [(srcpa POSTHOOK: Lineage: nzhang_part PARTITION(ds=2010-08-15,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: nzhang_part PARTITION(ds=2010-08-15,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: nzhang_part PARTITION(ds=2010-08-15,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2008-04-08')))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR '1' key) (TOK_SELEXPR '1' value) (TOK_SELEXPR 'file,' hr)) (TOK_LIMIT 1)))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part) (TOK_PARTSPEC (TOK_PARTVAL ds '2010-08-15') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-9 is a root stage Stage-2 depends on stages: Stage-9 @@ -2817,36 +2792,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:s-subquery2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: '1' - type: string - expr: '1' - type: string - expr: 'file,' - type: string + expressions: '1' (type: string), '1' (type: string), 'file,' (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2854,57 +2822,43 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - TableScan - Union - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part - null-subquery1:s-subquery1:srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 17436 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58 Data size: 17436 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 58 Data size: 17436 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + TableScan + Union + Statistics: Num rows: 58 Data size: 17436 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58 Data size: 17436 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 58 Data size: 17436 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part Stage: Stage-8 Conditional Operator @@ -2933,12 +2887,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2947,12 +2899,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/merge_dynamic_partition.q.out ql/src/test/results/clientpositive/merge_dynamic_partition.q.out index 1694ba1..28f6389 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition.q.out @@ -39,9 +39,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table merge_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart_merge_dp where ds='2008-04-08' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart_merge_dp))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME merge_dynamic_part) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2008-04-08')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -50,22 +47,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart_merge_dp + Map Operator Tree: TableScan alias: srcpart_merge_dp + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -639,9 +631,6 @@ insert overwrite table merge_dynamic_part partition (ds='2008-04-08', hr=11) sel POSTHOOK: type: QUERY POSTHOOK: Lineage: merge_dynamic_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: merge_dynamic_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart_merge_dp))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME merge_dynamic_part) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 11)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2008-04-08')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -655,20 +644,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart_merge_dp + Map Operator Tree: TableScan alias: srcpart_merge_dp + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -702,12 +688,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -716,12 +700,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1293,9 +1275,6 @@ POSTHOOK: Lineage: merge_dynamic_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE POSTHOOK: Lineage: merge_dynamic_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: merge_dynamic_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: merge_dynamic_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart_merge_dp))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME merge_dynamic_part) (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) 11))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -1309,24 +1288,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart_merge_dp + Map Operator Tree: TableScan alias: srcpart_merge_dp + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1360,12 +1332,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1374,12 +1344,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out index 91d94c1..90d155b 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out @@ -52,9 +52,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table merge_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart_merge_dp where ds='2008-04-08' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart_merge_dp))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME merge_dynamic_part) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2008-04-08')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -68,22 +65,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart_merge_dp + Map Operator Tree: TableScan alias: srcpart_merge_dp + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -117,12 +109,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -131,12 +121,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out index 1fe7205..cd44784 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out @@ -98,9 +98,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table merge_dynamic_part partition (ds, hr) select key, value, ds, hr from srcpart_merge_dp where ds>='2008-04-08' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart_merge_dp))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME merge_dynamic_part) (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (>= (TOK_TABLE_OR_COL ds) '2008-04-08')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -114,24 +111,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart_merge_dp + Map Operator Tree: TableScan alias: srcpart_merge_dp + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -165,12 +155,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -179,12 +167,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out index ac64255..484f964 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out @@ -116,9 +116,6 @@ POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).key SIMPLE POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart_merge_dp_rc))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME merge_dynamic_part) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION if (== (% (TOK_TABLE_OR_COL key) 2) 0) 'a1' 'b1') hr)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2008-04-08')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -132,22 +129,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart_merge_dp_rc + Map Operator Tree: TableScan alias: srcpart_merge_dp_rc + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: if(((key % 2) = 0), 'a1', 'b1') - type: string + expressions: key (type: string), value (type: string), if(((key % 2) = 0), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat diff --git ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out index 71d6333..77a750d 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out @@ -96,9 +96,6 @@ POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).key SIMPLE POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart_merge_dp_rc))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME merge_dynamic_part) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION if (== (% (TOK_TABLE_OR_COL key) 100) 0) 'a1' 'b1') hr)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2008-04-08')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -112,22 +109,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart_merge_dp_rc + Map Operator Tree: TableScan alias: srcpart_merge_dp_rc + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: if(((key % 100) = 0), 'a1', 'b1') - type: string + expressions: key (type: string), value (type: string), if(((key % 100) = 0), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat diff --git ql/src/test/results/clientpositive/mergejoins.q.out ql/src/test/results/clientpositive/mergejoins.q.out index cb709a8..4f6fc87 100644 --- ql/src/test/results/clientpositive/mergejoins.q.out +++ ql/src/test/results/clientpositive/mergejoins.q.out @@ -27,9 +27,6 @@ PREHOOK: query: explain select * from a join b on a.val1=b.val1 join c on a.val1 PREHOOK: type: QUERY POSTHOOK: query: explain select * from a join b on a.val1=b.val1 join c on a.val1=c.val1 join d on a.val1=d.val1 join e on a.val2=e.val2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME b)) (= (. (TOK_TABLE_OR_COL a) val1) (. (TOK_TABLE_OR_COL b) val1))) (TOK_TABREF (TOK_TABNAME c)) (= (. (TOK_TABLE_OR_COL a) val1) (. (TOK_TABLE_OR_COL c) val1))) (TOK_TABREF (TOK_TABNAME d)) (= (. (TOK_TABLE_OR_COL a) val1) (. (TOK_TABLE_OR_COL d) val1))) (TOK_TABREF (TOK_TABNAME e)) (= (. (TOK_TABLE_OR_COL a) val2) (. (TOK_TABLE_OR_COL e) val2)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -38,75 +35,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: val1 - type: int + key expressions: val1 (type: int) sort order: + - Map-reduce partition columns: - expr: val1 - type: int - tag: 0 - value expressions: - expr: val1 - type: int - expr: val2 - type: int - b + Map-reduce partition columns: val1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val1 (type: int), val2 (type: int) TableScan alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: val1 - type: int + key expressions: val1 (type: int) sort order: + - Map-reduce partition columns: - expr: val1 - type: int - tag: 1 - value expressions: - expr: val1 - type: int - expr: val2 - type: int - c + Map-reduce partition columns: val1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val1 (type: int), val2 (type: int) TableScan alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: val1 - type: int + key expressions: val1 (type: int) sort order: + - Map-reduce partition columns: - expr: val1 - type: int - tag: 2 - value expressions: - expr: val1 - type: int - expr: val2 - type: int - d + Map-reduce partition columns: val1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val1 (type: int), val2 (type: int) TableScan - alias: d + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: val1 - type: int + key expressions: val1 (type: int) sort order: + - Map-reduce partition columns: - expr: val1 - type: int - tag: 3 - value expressions: - expr: val1 - type: int - expr: val2 - type: int + Map-reduce partition columns: val1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val1 (type: int), val2 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -118,11 +83,10 @@ STAGE PLANS: 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} 3 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -130,52 +94,23 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: int + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: - expr: _col1 - type: int - tag: 0 - value expressions: - expr: _col12 - type: int - expr: _col13 - type: int - expr: _col4 - type: int - expr: _col5 - type: int - expr: _col8 - type: int - expr: _col9 - type: int - expr: _col0 - type: int - expr: _col1 - type: int - e + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col12 (type: int), _col13 (type: int), _col4 (type: int), _col5 (type: int), _col8 (type: int), _col9 (type: int), _col0 (type: int), _col1 (type: int) TableScan alias: e + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: val2 - type: int + key expressions: val2 (type: int) sort order: + - Map-reduce partition columns: - expr: val2 - type: int - tag: 1 - value expressions: - expr: val1 - type: int - expr: val2 - type: int + Map-reduce partition columns: val2 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val1 (type: int), val2 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -183,34 +118,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5} {VALUE._col8} {VALUE._col9} {VALUE._col12} {VALUE._col13} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13, _col16, _col17 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col12 - type: int - expr: _col13 - type: int - expr: _col4 - type: int - expr: _col5 - type: int - expr: _col8 - type: int - expr: _col9 - type: int - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col16 - type: int - expr: _col17 - type: int + expressions: _col12 (type: int), _col13 (type: int), _col4 (type: int), _col5 (type: int), _col8 (type: int), _col9 (type: int), _col0 (type: int), _col1 (type: int), _col16 (type: int), _col17 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -226,9 +142,6 @@ PREHOOK: type: QUERY POSTHOOK: query: --HIVE-3070 filter on outer join condition removed while merging join tree explain select * from src a join src b on a.key=b.key left outer join src c on b.key=c.key and b.key<10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME src) c) (and (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)) (< (. (TOK_TABLE_OR_COL b) key) 10)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -236,58 +149,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: c + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - c + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: c + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -301,26 +190,15 @@ STAGE PLANS: 0 1 {(VALUE._col0 < 10)} 2 - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Statistics: Num rows: 63 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 63 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 63 Data size: 12786 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/mergejoins_mixed.q.out ql/src/test/results/clientpositive/mergejoins_mixed.q.out index aa13bd7..4e16fbd 100644 --- ql/src/test/results/clientpositive/mergejoins_mixed.q.out +++ ql/src/test/results/clientpositive/mergejoins_mixed.q.out @@ -15,9 +15,6 @@ POSTHOOK: query: -- (a-b-c-d) explain select * from a join a b on (a.key=b.key) left outer join a c on (b.key=c.key) left outer join a d on (a.key=d.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME a) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME a) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -25,75 +22,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - c + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: value - type: string - d + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: d + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 3 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -105,30 +70,15 @@ STAGE PLANS: 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} 3 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col12 - type: string - expr: _col13 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string), _col9 (type: string), _col12 (type: string), _col13 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -144,9 +94,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from a join a b on (a.key=b.key) left outer join a c on (b.key=c.key) right outer join a d on (a.key=d.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME a) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME a) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -154,75 +101,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - c + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: value - type: string - d + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: d + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 3 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -234,30 +149,15 @@ STAGE PLANS: 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} 3 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col12 - type: string - expr: _col13 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string), _col9 (type: string), _col12 (type: string), _col13 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -273,9 +173,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.key=c.key) left outer join a d on (a.key=d.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME a) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME a) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -283,75 +180,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - c + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: value - type: string - d + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: d + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 3 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -363,30 +228,15 @@ STAGE PLANS: 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} 3 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col12 - type: string - expr: _col13 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string), _col9 (type: string), _col12 (type: string), _col13 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -402,9 +252,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.key=c.key) right outer join a d on (a.key=d.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME a) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME a) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -412,75 +259,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - c + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: value - type: string - d + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: d + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 3 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -492,30 +307,15 @@ STAGE PLANS: 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} 3 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col12 - type: string - expr: _col13 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string), _col9 (type: string), _col12 (type: string), _col13 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -533,9 +333,6 @@ POSTHOOK: query: -- ((a-b-d)-c) (reordered) explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) left outer join a d on (a.key=d.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME a) c) (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME a) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -544,58 +341,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - d + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: d + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -605,11 +378,10 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -617,48 +389,23 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col5 - type: string + key expressions: _col5 (type: string) sort order: + - Map-reduce partition columns: - expr: _col5 - type: string - tag: 0 - value expressions: - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - c + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col8 (type: string), _col9 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string) TableScan alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -666,30 +413,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5} {VALUE._col8} {VALUE._col9} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col12 - type: string - expr: _col13 - type: string - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col8 (type: string), _col9 (type: string), _col4 (type: string), _col5 (type: string), _col12 (type: string), _col13 (type: string), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -705,9 +437,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) right outer join a d on (a.key=d.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME a) c) (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME a) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -716,58 +445,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - d + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: d + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -777,11 +482,10 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -789,48 +493,23 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col5 - type: string + key expressions: _col5 (type: string) sort order: + - Map-reduce partition columns: - expr: _col5 - type: string - tag: 0 - value expressions: - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - c + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col8 (type: string), _col9 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string) TableScan alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -838,30 +517,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5} {VALUE._col8} {VALUE._col9} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col12 - type: string - expr: _col13 - type: string - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col8 (type: string), _col9 (type: string), _col4 (type: string), _col5 (type: string), _col12 (type: string), _col13 (type: string), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -877,9 +541,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from a join a b on (a.key=b.key) full outer join a c on (b.value=c.key) full outer join a d on (a.key=d.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_FULLOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME a) c) (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME a) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -888,58 +549,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - d + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: d + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -949,11 +586,10 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -961,48 +597,23 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col5 - type: string + key expressions: _col5 (type: string) sort order: + - Map-reduce partition columns: - expr: _col5 - type: string - tag: 0 - value expressions: - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - c + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col8 (type: string), _col9 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string) TableScan alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1010,30 +621,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5} {VALUE._col8} {VALUE._col9} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col12 - type: string - expr: _col13 - type: string - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col8 (type: string), _col9 (type: string), _col4 (type: string), _col5 (type: string), _col12 (type: string), _col13 (type: string), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1051,9 +647,6 @@ POSTHOOK: query: -- (((a-b)-c)-d) explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) right outer join a d on (a.key=d.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME a) c) (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME a) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -1063,41 +656,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1105,11 +682,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1117,44 +693,23 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col5 - type: string + key expressions: _col5 (type: string) sort order: + - Map-reduce partition columns: - expr: _col5 - type: string - tag: 0 - value expressions: - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - c + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string) TableScan alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1162,11 +717,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1174,48 +728,23 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col4 - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: _col4 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - d + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: d Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: _col4 (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col8 (type: string), _col9 (type: string), _col4 (type: string), _col5 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1223,30 +752,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5} {VALUE._col8} {VALUE._col9} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col12 - type: string - expr: _col13 - type: string + expressions: _col8 (type: string), _col9 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col12 (type: string), _col13 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1262,9 +776,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) full outer join a d on (a.key=d.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME a) c) (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME a) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -1274,41 +785,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1316,11 +811,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1328,44 +822,23 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col5 - type: string + key expressions: _col5 (type: string) sort order: + - Map-reduce partition columns: - expr: _col5 - type: string - tag: 0 - value expressions: - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - c + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string) TableScan alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1373,11 +846,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1385,48 +857,23 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col4 - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: _col4 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - d + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: d Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: _col4 (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col8 (type: string), _col9 (type: string), _col4 (type: string), _col5 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1434,30 +881,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5} {VALUE._col8} {VALUE._col9} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col12 - type: string - expr: _col13 - type: string + expressions: _col8 (type: string), _col9 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col12 (type: string), _col13 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1473,9 +905,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) left outer join a d on (a.key=d.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME a) c) (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME a) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -1485,41 +914,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1527,11 +940,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1539,44 +951,23 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col5 - type: string + key expressions: _col5 (type: string) sort order: + - Map-reduce partition columns: - expr: _col5 - type: string - tag: 0 - value expressions: - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - c + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string) TableScan alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1584,11 +975,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1596,48 +986,23 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col4 - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: _col4 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - d + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: d Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: _col4 (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col8 (type: string), _col9 (type: string), _col4 (type: string), _col5 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1645,30 +1010,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5} {VALUE._col8} {VALUE._col9} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col12 - type: string - expr: _col13 - type: string + expressions: _col8 (type: string), _col9 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col12 (type: string), _col13 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1684,9 +1034,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) full outer join a d on (a.key=d.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME a) c) (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME a) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -1696,41 +1043,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1738,11 +1069,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1750,44 +1080,23 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col5 - type: string + key expressions: _col5 (type: string) sort order: + - Map-reduce partition columns: - expr: _col5 - type: string - tag: 0 - value expressions: - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - c + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string) TableScan alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1795,11 +1104,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1807,48 +1115,23 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col4 - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: _col4 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - d + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: d Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: _col4 (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col8 (type: string), _col9 (type: string), _col4 (type: string), _col5 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1856,30 +1139,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5} {VALUE._col8} {VALUE._col9} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col12 - type: string - expr: _col13 - type: string + expressions: _col8 (type: string), _col9 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col12 (type: string), _col13 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1897,9 +1165,6 @@ POSTHOOK: query: -- ((a-b)-c-d) explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) left outer join a d on (c.key=d.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME a) c) (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME a) d) (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -1908,41 +1173,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1950,11 +1199,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1962,61 +1210,32 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col5 - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: _col5 - type: string - tag: 0 - value expressions: - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - c + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - d + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: d Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: _col5 (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -2026,30 +1245,15 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col12 - type: string - expr: _col13 - type: string + expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col8 (type: string), _col9 (type: string), _col12 (type: string), _col13 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/metadata_only_queries.q.out ql/src/test/results/clientpositive/metadata_only_queries.q.out index 03ca3ec..fea4a11 100644 --- ql/src/test/results/clientpositive/metadata_only_queries.q.out +++ ql/src/test/results/clientpositive/metadata_only_queries.q.out @@ -281,9 +281,6 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).s SIMPLE [(over10k)over10k. POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -291,106 +288,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - stats_tbl + Map Operator Tree: TableScan alias: stats_tbl + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: s - type: string - expr: bo - type: boolean - expr: bin - type: binary - expr: si - type: smallint - expr: i - type: int - expr: b - type: bigint + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) outputColumnNames: s, bo, bin, si, i, b + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - expr: sum(1) - expr: sum(0.2) - expr: count(1) - expr: count(s) - expr: count(bo) - expr: count(bin) - expr: count(si) - expr: max(i) - expr: min(b) - bucketGroup: false + aggregations: count(), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: double - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint - expr: _col8 - type: int - expr: _col9 - type: bigint + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: count(VALUE._col3) - expr: count(VALUE._col4) - expr: count(VALUE._col5) - expr: count(VALUE._col6) - expr: count(VALUE._col7) - expr: max(VALUE._col8) - expr: min(VALUE._col9) - bucketGroup: false + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: double - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint - expr: _col8 - type: int - expr: _col9 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -450,9 +377,6 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).s SIMPLE [(over10k)over10k. POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -460,106 +384,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - stats_tbl_part + Map Operator Tree: TableScan alias: stats_tbl_part + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: s - type: string - expr: bo - type: boolean - expr: bin - type: binary - expr: si - type: smallint - expr: i - type: int - expr: b - type: bigint + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) outputColumnNames: s, bo, bin, si, i, b + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - expr: sum(1) - expr: sum(0.2) - expr: count(1) - expr: count(s) - expr: count(bo) - expr: count(bin) - expr: count(si) - expr: max(i) - expr: min(b) - bucketGroup: false + aggregations: count(), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: double - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint - expr: _col8 - type: int - expr: _col9 - type: bigint + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: count(VALUE._col3) - expr: count(VALUE._col4) - expr: count(VALUE._col5) - expr: count(VALUE._col6) - expr: count(VALUE._col7) - expr: max(VALUE._col8) - expr: min(VALUE._col9) - bucketGroup: false + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: double - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint - expr: _col8 - type: int - expr: _col9 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -833,9 +687,6 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).s SIMPLE [(over10k)over10k. POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -945,9 +796,6 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).s SIMPLE [(over10k)over10k. POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1055,9 +903,6 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).s SIMPLE [(over10k)over10k. POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL ts)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1065,42 +910,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - stats_tbl_part + Map Operator Tree: TableScan alias: stats_tbl_part + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: ts - type: timestamp + expressions: ts (type: timestamp) outputColumnNames: ts + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(ts) - bucketGroup: false + aggregations: count(ts) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out index b16378a..01dbf3d 100644 --- ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out +++ ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out @@ -228,9 +228,6 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).s SIMPLE [(over10k)over10k. POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d)))) (TOK_WHERE (= (TOK_TABLE_OR_COL dt) 2010)))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -296,9 +293,6 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).s SIMPLE [(over10k)over10k. POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2014).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 2)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d)))) (TOK_WHERE (> (TOK_TABLE_OR_COL dt) 2010)))) - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/metadataonly1.q.out ql/src/test/results/clientpositive/metadataonly1.q.out index 960ef6b..fc6e9e5 100644 --- ql/src/test/results/clientpositive/metadataonly1.q.out +++ ql/src/test/results/clientpositive/metadataonly1.q.out @@ -8,7 +8,23 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select max(ds) from TEST1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME TEST1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL ds)))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + max + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -17,60 +33,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test1 - TableScan - alias: test1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE - GatherStats: false - Select Operator - expressions: - expr: ds - type: string - outputColumnNames: ds - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE - Group By Operator - aggregations: - expr: max(ds) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE - Reduce Output Operator - sort order: - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE - tag: -1 - value expressions: - expr: _col0 - type: string Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - bucketGroup: false + aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -112,7 +91,23 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select max(ds) from TEST1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME TEST1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL ds)))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + max + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -121,36 +116,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test1 + Map Operator Tree: TableScan alias: test1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: ds - type: string + expressions: ds (type: string) outputColumnNames: ds - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: max(ds) - bucketGroup: false + aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: string + value expressions: _col0 (type: string) Path -> Alias: -mr-10002default.test1{ds=1} [test1] Path -> Partition: @@ -195,27 +179,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - bucketGroup: false + aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -252,7 +229,23 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(distinct ds) from TEST1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME TEST1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL ds)))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONDI + count + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -261,42 +254,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test1 + Map Operator Tree: TableScan alias: test1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: ds - type: string + expressions: ds (type: string) outputColumnNames: ds - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(DISTINCT ds) - bucketGroup: false - keys: - expr: ds - type: string + aggregations: count(DISTINCT ds) + keys: ds (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col1 - type: bigint + value expressions: _col1 (type: bigint) Path -> Alias: -mr-10002default.test1{ds=1} [test1] Path -> Partition: @@ -341,27 +319,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0:0._col0) - bucketGroup: false + aggregations: count(DISTINCT KEY._col0:0._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -398,7 +369,23 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(ds) from TEST1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME TEST1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL ds)))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + count + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -407,36 +394,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test1 + Map Operator Tree: TableScan alias: test1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: ds - type: string + expressions: ds (type: string) outputColumnNames: ds - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(ds) - bucketGroup: false + aggregations: count(ds) mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -481,27 +457,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -547,7 +516,50 @@ POSTHOOK: query: explain extended select count(*) from TEST1 a2 join (select max(ds) m from TEST1) b on a2.ds=b.m POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME TEST1) a2) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME TEST1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL ds)) m)))) b) (= (. (TOK_TABLE_OR_COL a2) ds) (. (TOK_TABLE_OR_COL b) m)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + TEST1 + a2 + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + max + TOK_TABLE_OR_COL + ds + m + b + = + . + TOK_TABLE_OR_COL + a2 + ds + . + TOK_TABLE_OR_COL + b + m + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -558,36 +570,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b:test1 + Map Operator Tree: TableScan alias: test1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: ds - type: string + expressions: ds (type: string) outputColumnNames: ds - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: max(ds) - bucketGroup: false + aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: string + value expressions: _col0 (type: string) Path -> Alias: -mr-10004default.test1{ds=1} [b:test1] -mr-10005default.test1{ds=2} [b:test1] @@ -670,20 +671,14 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - bucketGroup: false + aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 @@ -704,37 +699,24 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 - a2 TableScan alias: a2 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Reduce Output Operator - key expressions: - expr: ds - type: string + key expressions: ds (type: string) sort order: + - Map-reduce partition columns: - expr: ds - type: string - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Map-reduce partition columns: ds (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE tag: 0 Path -> Alias: #### A masked pattern was here #### @@ -843,20 +825,14 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false - Statistics: - numRows: 1 dataSize: 92 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: - numRows: 1 dataSize: 92 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -877,18 +853,14 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -917,27 +889,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1002,7 +967,29 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select ds, count(distinct hr) from TEST2 group by ds POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME TEST2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL hr)))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_SELEXPR + TOK_FUNCTIONDI + count + TOK_TABLE_OR_COL + hr + TOK_GROUPBY + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1011,51 +998,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test2 + Map Operator Tree: TableScan alias: test2 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: ds - type: string - expr: hr - type: string + expressions: ds (type: string), hr (type: string) outputColumnNames: ds, hr - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(DISTINCT hr) - bucketGroup: false - keys: - expr: ds - type: string - expr: hr - type: string + aggregations: count(DISTINCT hr) + keys: ds (type: string), hr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col2 - type: bigint + value expressions: _col2 (type: bigint) Path -> Alias: -mr-10002default.test2{ds=1, hr=1} [test2] -mr-10003default.test2{ds=1, hr=2} [test2] @@ -1179,32 +1143,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1245,7 +1198,29 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select ds, count(hr) from TEST2 group by ds POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME TEST2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL hr)))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_SELEXPR + TOK_FUNCTION + count + TOK_TABLE_OR_COL + hr + TOK_GROUPBY + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1254,47 +1229,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test2 + Map Operator Tree: TableScan alias: test2 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: ds - type: string - expr: hr - type: string + expressions: ds (type: string), hr (type: string) outputColumnNames: ds, hr - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(hr) - bucketGroup: false - keys: - expr: ds - type: string + aggregations: count(hr) + keys: ds (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col1 - type: bigint + value expressions: _col1 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1416,32 +1372,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1481,7 +1426,23 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select max(ds) from TEST1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME TEST1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL ds)))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + max + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1490,36 +1451,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test1 + Map Operator Tree: TableScan alias: test1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: ds - type: string + expressions: ds (type: string) outputColumnNames: ds - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: max(ds) - bucketGroup: false + aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col0 - type: string + value expressions: _col0 (type: string) Path -> Alias: -mr-10002default.test1{ds=1} [test1] -mr-10003default.test1{ds=2} [test1] @@ -1602,27 +1552,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - bucketGroup: false + aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 84 basicStatsState: COMPLETE colStatsState: COMPLETE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1712,7 +1655,29 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select ds, count(distinct hr) from TEST2 group by ds POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME TEST2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL hr)))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + TEST2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_SELEXPR + TOK_FUNCTIONDI + count + TOK_TABLE_OR_COL + hr + TOK_GROUPBY + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1721,51 +1686,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test2 + Map Operator Tree: TableScan alias: test2 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: ds - type: string - expr: hr - type: string + expressions: ds (type: string), hr (type: string) outputColumnNames: ds, hr - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(DISTINCT hr) - bucketGroup: false - keys: - expr: ds - type: string - expr: hr - type: string + aggregations: count(DISTINCT hr) + keys: ds (type: string), hr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE tag: -1 - value expressions: - expr: _col2 - type: bigint + value expressions: _col2 (type: bigint) Path -> Alias: -mr-10002default.test2{ds=01_10_10, hr=01} [test2] -mr-10003default.test2{ds=01_10_20, hr=02} [test2] @@ -1967,32 +1909,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/multiMapJoin1.q.out ql/src/test/results/clientpositive/multiMapJoin1.q.out index 1899cd3..8d888a2 100644 --- ql/src/test/results/clientpositive/multiMapJoin1.q.out +++ ql/src/test/results/clientpositive/multiMapJoin1.q.out @@ -185,9 +185,6 @@ POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:str POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 is a root stage @@ -195,51 +192,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - firstjoin:bigtbl + Map Operator Tree: TableScan alias: bigtbl + Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col1 - Position of Big Table: 0 + Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col1 + Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col1]] - 1 [Column[value]] - Position of Big Table: 0 + 0 _col1 (type: string) + 1 value (type: string) + Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -253,24 +245,24 @@ STAGE PLANS: firstjoin:smalltbl1 TableScan alias: smalltbl1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE smalltbl2 TableScan alias: smalltbl2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -347,9 +339,6 @@ POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:str POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 is a root stage @@ -357,51 +346,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - firstjoin:bigtbl + Map Operator Tree: TableScan alias: bigtbl + Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col1 - Position of Big Table: 0 + Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col1 + Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col1]] - 1 [Column[value]] - Position of Big Table: 0 + 0 _col1 (type: string) + 1 value (type: string) + Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -415,24 +399,24 @@ STAGE PLANS: firstjoin:smalltbl1 TableScan alias: smalltbl1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE smalltbl2 TableScan alias: smalltbl2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -513,9 +497,6 @@ POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:str POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL smallTbl2) key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 is a root stage @@ -523,65 +504,52 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - firstjoin:bigtbl + Map Operator Tree: TableScan alias: bigtbl + Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col1 - Position of Big Table: 0 + Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col1 + Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col1]] - 1 [Column[value]] + 0 _col1 (type: string) + 1 value (type: string) outputColumnNames: _col3 - Position of Big Table: 0 + Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col3 - type: string + expressions: _col3 (type: string) outputColumnNames: _col3 + Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col3 - type: string + aggregations: count() + keys: _col3 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -595,27 +563,25 @@ STAGE PLANS: firstjoin:smalltbl1 TableScan alias: smalltbl1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE smalltbl2 TableScan alias: smalltbl2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 3025 Data size: 32137 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: bigint + expressions: _col1 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 3025 Data size: 32137 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3025 Data size: 32137 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -846,9 +812,6 @@ POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:str POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key1) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl1) key) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) join1) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL join1) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl2) key) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value2) value2)))) join2) (TOK_TABREF (TOK_TABNAME smallTbl3)) (= (. (TOK_TABLE_OR_COL join2) key2) (. (TOK_TABLE_OR_COL smallTbl3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key4) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl3) key) key5) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value2) value2)))) join3) (TOK_TABREF (TOK_TABNAME smallTbl4)) (= (. (TOK_TABLE_OR_COL join3) key3) (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key3)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key4)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key5)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value2))))))) - STAGE DEPENDENCIES: Stage-20 is a root stage , consists of Stage-18, Stage-19, Stage-5 Stage-18 has a backup stage: Stage-5 @@ -875,8 +838,7 @@ STAGE PLANS: Stage: Stage-18 Map Reduce - Alias -> Map Operator Tree: - join3:join2:join1:bigtbl + Map Operator Tree: TableScan alias: bigtbl Map Join Operator @@ -885,28 +847,15 @@ STAGE PLANS: condition expressions: 0 {key1} {key2} {value} 1 {key} - handleSkewJoin: false keys: - 0 [Column[key1]] - 1 [Column[key]] + 0 key1 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - expr: _col2 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -927,8 +876,7 @@ STAGE PLANS: Stage: Stage-15 Map Reduce - Alias -> Map Operator Tree: - join3:join2:$INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -936,30 +884,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col3]] - 1 [Column[value]] + 0 _col3 (type: string) + 1 value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: string - expr: _col3 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -980,8 +913,7 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: - join3:$INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -989,32 +921,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col1]] - 1 [Column[key]] + 0 _col1 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col6 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1035,8 +950,7 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -1044,47 +958,19 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col2]] - 1 [Column[key]] + 0 _col2 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col7 - type: string - expr: _col5 - type: string - expr: _col6 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - expr: sum(hash(_col4)) - expr: sum(hash(_col7)) - expr: sum(hash(_col5)) - expr: sum(hash(_col6)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1102,65 +988,25 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: sum(VALUE._col3) - expr: sum(VALUE._col4) - expr: sum(VALUE._col5) - expr: sum(VALUE._col6) - expr: sum(VALUE._col7) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1168,8 +1014,7 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: - smalltbl4 + Map Operator Tree: TableScan alias: smalltbl4 Map Join Operator @@ -1178,47 +1023,19 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col2]] - 1 [Column[key]] + 0 _col2 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col7 - type: string - expr: _col5 - type: string - expr: _col6 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - expr: sum(hash(_col4)) - expr: sum(hash(_col7)) - expr: sum(hash(_col5)) - expr: sum(hash(_col6)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1235,48 +1052,23 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col2 - type: string + key expressions: _col2 (type: string) sort order: + - Map-reduce partition columns: - expr: _col2 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - smalltbl4 + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) TableScan alias: smalltbl4 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1284,43 +1076,19 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col7 - type: string - expr: _col5 - type: string - expr: _col6 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 + Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - expr: sum(hash(_col4)) - expr: sum(hash(_col7)) - expr: sum(hash(_col5)) - expr: sum(hash(_col6)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1328,8 +1096,7 @@ STAGE PLANS: Stage: Stage-13 Map Reduce - Alias -> Map Operator Tree: - join3:smalltbl3 + Map Operator Tree: TableScan alias: smalltbl3 Map Join Operator @@ -1338,32 +1105,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col1]] - 1 [Column[key]] + 0 _col1 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col6 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1380,46 +1130,23 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - join3:$INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - join3:smalltbl3 + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) TableScan alias: smalltbl3 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1427,28 +1154,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col6 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1456,8 +1169,7 @@ STAGE PLANS: Stage: Stage-16 Map Reduce - Alias -> Map Operator Tree: - join3:join2:smalltbl2 + Map Operator Tree: TableScan alias: smalltbl2 Map Join Operator @@ -1466,30 +1178,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col3]] - 1 [Column[value]] + 0 _col3 (type: string) + 1 value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: string - expr: _col3 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1506,44 +1203,23 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - join3:join2:$INTNAME + Map Operator Tree: TableScan + alias: smalltbl2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col3 - type: string + key expressions: value (type: string) sort order: + - Map-reduce partition columns: - expr: _col3 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - join3:join2:smalltbl2 + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: smalltbl2 Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: _col3 (type: string) sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1551,26 +1227,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: string - expr: _col3 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1578,8 +1242,7 @@ STAGE PLANS: Stage: Stage-19 Map Reduce - Alias -> Map Operator Tree: - join3:join2:join1:smalltbl1 + Map Operator Tree: TableScan alias: smalltbl1 Map Join Operator @@ -1588,28 +1251,15 @@ STAGE PLANS: condition expressions: 0 {key1} {key2} {value} 1 {key} - handleSkewJoin: false keys: - 0 [Column[key1]] - 1 [Column[key]] + 0 key1 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - expr: _col2 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1627,41 +1277,25 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - join3:join2:join1:bigtbl + Map Operator Tree: TableScan alias: bigtbl + Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key1 - type: string + key expressions: key1 (type: string) sort order: + - Map-reduce partition columns: - expr: key1 - type: string - tag: 0 - value expressions: - expr: key1 - type: string - expr: key2 - type: string - expr: value - type: string - join3:join2:join1:smalltbl1 + Map-reduce partition columns: key1 (type: string) + Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + value expressions: key1 (type: string), key2 (type: string), value (type: string) TableScan alias: smalltbl1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1669,24 +1303,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col5 + Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - expr: _col2 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1871,9 +1495,6 @@ POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:str POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key1) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl1) key) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) join1) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL join1) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl2) key) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value2) value2)))) join2) (TOK_TABREF (TOK_TABNAME smallTbl3)) (= (. (TOK_TABLE_OR_COL join2) key2) (. (TOK_TABLE_OR_COL smallTbl3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key4) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl3) key) key5) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value2) value2)))) join3) (TOK_TABREF (TOK_TABNAME smallTbl4)) (= (. (TOK_TABLE_OR_COL join3) key3) (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key3)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key4)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key5)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value2))))))) - STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 is a root stage @@ -1881,155 +1502,79 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - join3:join2:join1:bigtbl + Map Operator Tree: TableScan alias: bigtbl + Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key1} {key2} {value} 1 {key} - handleSkewJoin: false keys: - 0 [Column[key1]] - 1 [Column[key]] + 0 key1 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col5 - Position of Big Table: 0 + Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - expr: _col2 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col3]] - 1 [Column[value]] + 0 _col3 (type: string) + 1 value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Position of Big Table: 0 + Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: string - expr: _col3 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col1]] - 1 [Column[key]] + 0 _col1 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Position of Big Table: 0 + Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col6 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col2]] - 1 [Column[key]] + 0 _col2 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Position of Big Table: 0 + Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col7 - type: string - expr: _col5 - type: string - expr: _col6 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 + Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - expr: sum(hash(_col4)) - expr: sum(hash(_col7)) - expr: sum(hash(_col5)) - expr: sum(hash(_col6)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -2049,51 +1594,32 @@ STAGE PLANS: join3:join2:join1:smalltbl1 TableScan alias: smalltbl1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE join3:join2:smalltbl2 TableScan alias: smalltbl2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE join3:smalltbl3 TableScan alias: smalltbl3 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE smalltbl4 TableScan alias: smalltbl4 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: sum(VALUE._col3) - expr: sum(VALUE._col4) - expr: sum(VALUE._col5) - expr: sum(VALUE._col6) - expr: sum(VALUE._col7) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2280,9 +1806,6 @@ POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:str POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key1) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl1) key) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) join1) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL join1) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl2) key) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value2) value2)))) join2) (TOK_TABREF (TOK_TABNAME smallTbl3)) (= (. (TOK_TABLE_OR_COL join2) key2) (. (TOK_TABLE_OR_COL smallTbl3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key4) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl3) key) key5) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value2) value2)))) join3) (TOK_TABREF (TOK_TABNAME smallTbl4)) (= (. (TOK_TABLE_OR_COL join3) key3) (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key3)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key4)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key5)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value2))))))) - STAGE DEPENDENCIES: Stage-11 is a root stage Stage-4 depends on stages: Stage-11 @@ -2291,65 +1814,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-11 Map Reduce - Alias -> Map Operator Tree: - join3:join2:join1:bigtbl + Map Operator Tree: TableScan alias: bigtbl + Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key1} {key2} {value} 1 {key} - handleSkewJoin: false keys: - 0 [Column[key1]] - 1 [Column[key]] + 0 key1 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col5 - Position of Big Table: 0 + Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - expr: _col2 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col3]] - 1 [Column[value]] + 0 _col3 (type: string) + 1 value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Position of Big Table: 0 + Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: string - expr: _col3 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2367,14 +1867,15 @@ STAGE PLANS: join3:join2:join1:smalltbl1 TableScan alias: smalltbl1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE join3:join2:smalltbl2 TableScan alias: smalltbl2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - join3:$INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -2382,93 +1883,39 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col1]] - 1 [Column[key]] + 0 _col1 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Position of Big Table: 0 + Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col6 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col2]] - 1 [Column[key]] + 0 _col2 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Position of Big Table: 0 + Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col7 - type: string - expr: _col5 - type: string - expr: _col6 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 + Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - expr: sum(hash(_col4)) - expr: sum(hash(_col7)) - expr: sum(hash(_col5)) - expr: sum(hash(_col6)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -2482,45 +1929,24 @@ STAGE PLANS: join3:smalltbl3 TableScan alias: smalltbl3 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE smalltbl4 TableScan alias: smalltbl4 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: sum(VALUE._col3) - expr: sum(VALUE._col4) - expr: sum(VALUE._col5) - expr: sum(VALUE._col6) - expr: sum(VALUE._col7) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2703,9 +2129,6 @@ POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:str POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key1) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl1) key) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) join1) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL join1) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl2) key) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value2) value2)))) join2) (TOK_TABREF (TOK_TABNAME smallTbl3)) (= (. (TOK_TABLE_OR_COL join2) key2) (. (TOK_TABLE_OR_COL smallTbl3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key4) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl3) key) key5) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value2) value2)))) join3) (TOK_TABREF (TOK_TABNAME smallTbl4)) (= (. (TOK_TABLE_OR_COL join3) key3) (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key3)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key4)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key5)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value2))))))) - STAGE DEPENDENCIES: Stage-20 is a root stage , consists of Stage-18, Stage-19, Stage-5 Stage-18 has a backup stage: Stage-5 @@ -2732,8 +2155,7 @@ STAGE PLANS: Stage: Stage-18 Map Reduce - Alias -> Map Operator Tree: - join3:join2:join1:bigtbl + Map Operator Tree: TableScan alias: bigtbl Map Join Operator @@ -2742,28 +2164,15 @@ STAGE PLANS: condition expressions: 0 {key1} {key2} {value} 1 {key} - handleSkewJoin: false keys: - 0 [Column[key1]] - 1 [Column[key]] + 0 key1 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - expr: _col2 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2784,8 +2193,7 @@ STAGE PLANS: Stage: Stage-15 Map Reduce - Alias -> Map Operator Tree: - join3:join2:$INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -2793,30 +2201,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col3]] - 1 [Column[value]] + 0 _col3 (type: string) + 1 value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: string - expr: _col3 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2837,8 +2230,7 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: - join3:$INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -2846,32 +2238,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col1]] - 1 [Column[key]] + 0 _col1 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col6 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2892,8 +2267,7 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -2901,47 +2275,19 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col2]] - 1 [Column[key]] + 0 _col2 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col7 - type: string - expr: _col5 - type: string - expr: _col6 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - expr: sum(hash(_col4)) - expr: sum(hash(_col7)) - expr: sum(hash(_col5)) - expr: sum(hash(_col6)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2959,65 +2305,25 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - expr: sum(VALUE._col3) - expr: sum(VALUE._col4) - expr: sum(VALUE._col5) - expr: sum(VALUE._col6) - expr: sum(VALUE._col7) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: bigint - expr: _col5 - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3025,8 +2331,7 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: - smalltbl4 + Map Operator Tree: TableScan alias: smalltbl4 Map Join Operator @@ -3035,47 +2340,19 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col2]] - 1 [Column[key]] + 0 _col2 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col7 - type: string - expr: _col5 - type: string - expr: _col6 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - expr: sum(hash(_col4)) - expr: sum(hash(_col7)) - expr: sum(hash(_col5)) - expr: sum(hash(_col6)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3092,48 +2369,23 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col2 - type: string + key expressions: _col2 (type: string) sort order: + - Map-reduce partition columns: - expr: _col2 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - smalltbl4 + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) TableScan alias: smalltbl4 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -3141,43 +2393,19 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col7 - type: string - expr: _col5 - type: string - expr: _col6 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 + Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - expr: sum(hash(_col4)) - expr: sum(hash(_col7)) - expr: sum(hash(_col5)) - expr: sum(hash(_col6)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3185,8 +2413,7 @@ STAGE PLANS: Stage: Stage-13 Map Reduce - Alias -> Map Operator Tree: - join3:smalltbl3 + Map Operator Tree: TableScan alias: smalltbl3 Map Join Operator @@ -3195,32 +2422,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col1]] - 1 [Column[key]] + 0 _col1 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col6 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3237,46 +2447,23 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - join3:$INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - join3:smalltbl3 + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) TableScan alias: smalltbl3 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -3284,28 +2471,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col6 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3313,8 +2486,7 @@ STAGE PLANS: Stage: Stage-16 Map Reduce - Alias -> Map Operator Tree: - join3:join2:smalltbl2 + Map Operator Tree: TableScan alias: smalltbl2 Map Join Operator @@ -3323,30 +2495,15 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col3]] - 1 [Column[value]] + 0 _col3 (type: string) + 1 value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: string - expr: _col3 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3363,44 +2520,23 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - join3:join2:$INTNAME + Map Operator Tree: TableScan + alias: smalltbl2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col3 - type: string + key expressions: value (type: string) sort order: + - Map-reduce partition columns: - expr: _col3 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - join3:join2:smalltbl2 + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: smalltbl2 Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: _col3 (type: string) sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -3408,26 +2544,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: string - expr: _col3 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3435,8 +2559,7 @@ STAGE PLANS: Stage: Stage-19 Map Reduce - Alias -> Map Operator Tree: - join3:join2:join1:smalltbl1 + Map Operator Tree: TableScan alias: smalltbl1 Map Join Operator @@ -3445,28 +2568,15 @@ STAGE PLANS: condition expressions: 0 {key1} {key2} {value} 1 {key} - handleSkewJoin: false keys: - 0 [Column[key1]] - 1 [Column[key]] + 0 key1 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - expr: _col2 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3484,41 +2594,25 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - join3:join2:join1:bigtbl + Map Operator Tree: TableScan alias: bigtbl + Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key1 - type: string + key expressions: key1 (type: string) sort order: + - Map-reduce partition columns: - expr: key1 - type: string - tag: 0 - value expressions: - expr: key1 - type: string - expr: key2 - type: string - expr: value - type: string - join3:join2:join1:smalltbl1 + Map-reduce partition columns: key1 (type: string) + Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + value expressions: key1 (type: string), key2 (type: string), value (type: string) TableScan alias: smalltbl1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -3526,24 +2620,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col5 + Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - expr: _col2 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/multiMapJoin2.q.out ql/src/test/results/clientpositive/multiMapJoin2.q.out index 034ac51..c59a407 100644 --- ql/src/test/results/clientpositive/multiMapJoin2.q.out +++ ql/src/test/results/clientpositive/multiMapJoin2.q.out @@ -14,9 +14,6 @@ FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp ORDER BY tmp.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x1) (TOK_TABREF (TOK_TABNAME src1) y1) (= (. (TOK_TABLE_OR_COL x1) key) (. (TOK_TABLE_OR_COL y1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL tmp) key))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 is a root stage @@ -24,77 +21,65 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:tmp-subquery1:x1 + Map Operator Tree: TableScan - alias: x1 + alias: x2 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - null-subquery2:tmp-subquery2:x2 + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan - alias: x2 + alias: x1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -108,14 +93,17 @@ STAGE PLANS: null-subquery1:tmp-subquery1:y1 TableScan alias: y1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE null-subquery2:tmp-subquery2:y2 TableScan alias: y2 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Extract + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -245,9 +233,6 @@ FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp ORDER BY tmp.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x1) (TOK_TABREF (TOK_TABNAME src1) y1) (= (. (TOK_TABLE_OR_COL x1) key) (. (TOK_TABLE_OR_COL y1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL tmp) key))))) - STAGE DEPENDENCIES: Stage-7 is a root stage Stage-2 depends on stages: Stage-7 @@ -256,30 +241,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:tmp-subquery1:x1 + Map Operator Tree: TableScan alias: x1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -294,62 +276,52 @@ STAGE PLANS: null-subquery1:tmp-subquery1:y1 TableScan alias: y1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - null-subquery2:tmp-subquery2:x2 + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan alias: x2 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -360,11 +332,13 @@ STAGE PLANS: null-subquery2:tmp-subquery2:y2 TableScan alias: y2 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Extract + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -488,9 +462,6 @@ FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp ORDER BY tmp.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL tmp) key))))) - STAGE DEPENDENCIES: Stage-4 is a root stage Stage-2 depends on stages: Stage-4 @@ -499,47 +470,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:tmp-subquery1:x1 + Map Operator Tree: TableScan alias: x1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -547,59 +507,48 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 64 Data size: 6501 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 64 Data size: 6501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - null-subquery2:tmp-subquery2:x2 + Statistics: Num rows: 64 Data size: 6501 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan alias: x2 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 64 Data size: 6501 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 64 Data size: 6501 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 64 Data size: 6501 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -610,11 +559,13 @@ STAGE PLANS: null-subquery2:tmp-subquery2:y2 TableScan alias: y2 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Extract + Statistics: Num rows: 64 Data size: 6501 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 64 Data size: 6501 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -733,9 +684,6 @@ ON (tmp1.key = tmp2.key) GROUP BY tmp1.key ORDER BY key, cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x1) (TOK_TABREF (TOK_TABNAME src1) y1) (= (. (TOK_TABLE_OR_COL x1) key) (. (TOK_TABLE_OR_COL y1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x2) key)))) tmp2) (= (. (TOK_TABLE_OR_COL tmp1) key) (. (TOK_TABLE_OR_COL tmp2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL tmp1) key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cnt))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-12 depends on stages: Stage-2, Stage-8 , consists of Stage-10, Stage-11, Stage-3 @@ -750,43 +698,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp2:x2 + Map Operator Tree: TableScan alias: x2 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -797,22 +737,19 @@ STAGE PLANS: tmp2:y2 TableScan alias: y2 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -823,8 +760,7 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: - $INTNAME1 + Map Operator Tree: TableScan Map Join Operator condition map: @@ -832,29 +768,20 @@ STAGE PLANS: condition expressions: 0 {_col0} 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count() + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -871,41 +798,27 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -913,27 +826,19 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -941,8 +846,7 @@ STAGE PLANS: Stage: Stage-11 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -950,29 +854,20 @@ STAGE PLANS: condition expressions: 0 {_col0} 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count() + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -989,32 +884,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1022,25 +905,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count() + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1048,43 +926,35 @@ STAGE PLANS: Stage: Stage-8 Map Reduce - Alias -> Map Operator Tree: - tmp1:x1 + Map Operator Tree: TableScan alias: x1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -1095,22 +965,19 @@ STAGE PLANS: tmp1:y1 TableScan alias: y1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1195,9 +1062,6 @@ ON (tmp1.key = tmp2.key) GROUP BY tmp1.key ORDER BY key, cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x1) (TOK_TABREF (TOK_TABNAME src1) y1) (= (. (TOK_TABLE_OR_COL x1) key) (. (TOK_TABLE_OR_COL y1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x2) key)))) tmp2) (= (. (TOK_TABLE_OR_COL tmp1) key) (. (TOK_TABLE_OR_COL tmp2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL tmp1) key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cnt))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -1206,79 +1070,63 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp1:x1 + Map Operator Tree: TableScan - alias: x1 + alias: x2 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - tmp2:x2 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x2 + alias: x1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -1292,107 +1140,92 @@ STAGE PLANS: tmp1:y1 TableScan alias: y1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE tmp2:y2 TableScan alias: y2 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count() + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count() + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1400,27 +1233,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1511,9 +1336,6 @@ ON (tmp1.key = tmp2.key) GROUP BY tmp1.key ORDER BY key, cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x2) key)))) tmp2) (= (. (TOK_TABLE_OR_COL tmp1) key) (. (TOK_TABLE_OR_COL tmp2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL tmp1) key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cnt))))) - STAGE DEPENDENCIES: Stage-7 is a root stage Stage-10 depends on stages: Stage-2, Stage-7 , consists of Stage-8, Stage-9, Stage-3 @@ -1528,47 +1350,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: - tmp1:x1 + Map Operator Tree: TableScan alias: x1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1579,8 +1390,7 @@ STAGE PLANS: Stage: Stage-8 Map Reduce - Alias -> Map Operator Tree: - $INTNAME1 + Map Operator Tree: TableScan Map Join Operator condition map: @@ -1588,29 +1398,20 @@ STAGE PLANS: condition expressions: 0 {_col0} 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count() + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1627,41 +1428,27 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1669,27 +1456,19 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1697,8 +1476,7 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -1706,29 +1484,20 @@ STAGE PLANS: condition expressions: 0 {_col0} 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count() + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1745,32 +1514,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1778,25 +1535,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count() + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 34 Data size: 3459 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1804,43 +1556,35 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp2:x2 + Map Operator Tree: TableScan alias: x2 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -1851,22 +1595,19 @@ STAGE PLANS: tmp2:y2 TableScan alias: y2 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1951,9 +1692,6 @@ ON (tmp1.key = tmp2.key) GROUP BY tmp1.key ORDER BY key, cnt POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) x2) (TOK_TABREF (TOK_TABNAME src1) y2) (= (. (TOK_TABLE_OR_COL x2) key) (. (TOK_TABLE_OR_COL y2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x2) key) key)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x2) key)))) tmp2) (= (. (TOK_TABLE_OR_COL tmp1) key) (. (TOK_TABLE_OR_COL tmp2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) key) key) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL tmp1) key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cnt))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -1962,67 +1700,52 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp1:x1 + Map Operator Tree: TableScan alias: x1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - tmp2:x2 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE TableScan alias: x2 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -2033,104 +1756,88 @@ STAGE PLANS: tmp2:y2 TableScan alias: y2 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator + Statistics: Num rows: 65 Data size: 6609 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 32 Data size: 3253 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 32 Data size: 3253 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 64 Data size: 6506 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count() + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 32 Data size: 3253 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 32 Data size: 3253 Basic stats: COMPLETE Column stats: NONE Mux Operator + Statistics: Num rows: 64 Data size: 6506 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count() + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2138,27 +1845,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2256,9 +1955,6 @@ POSTHOOK: Lineage: part_table PARTITION(partitionid=1).key SIMPLE [(src)src.Fiel POSTHOOK: Lineage: part_table PARTITION(partitionid=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: part_table PARTITION(partitionid=2).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: part_table PARTITION(partitionid=2).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME part_table) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 is a root stage @@ -2266,34 +1962,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 125 Data size: 1261 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 0 + 0 key (type: string) + 1 key (type: string) + Statistics: Num rows: 137 Data size: 1387 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 137 Data size: 1387 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -2304,21 +1997,20 @@ STAGE PLANS: y TableScan alias: y + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2379,9 +2071,6 @@ POSTHOOK: Lineage: part_table PARTITION(partitionid=1).key SIMPLE [(src)src.Fiel POSTHOOK: Lineage: part_table PARTITION(partitionid=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: part_table PARTITION(partitionid=2).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: part_table PARTITION(partitionid=2).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) key)))) tmp) (TOK_TABREF (TOK_TABNAME src) c) (= (. (TOK_TABLE_OR_COL tmp) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) key))))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) key)))) tmp) (TOK_TABREF (TOK_TABNAME src) c) (= (. (TOK_TABLE_OR_COL tmp) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) key)))))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-4 depends on stages: Stage-2, Stage-7 @@ -2391,43 +2080,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:x-subquery2:tmp:b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -2438,22 +2119,19 @@ STAGE PLANS: null-subquery2:x-subquery2:tmp:a TableScan alias: a + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2461,8 +2139,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:x-subquery1:$INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -2470,31 +2147,28 @@ STAGE PLANS: condition expressions: 0 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] + 0 _col0 (type: string) + 1 key (type: string) outputColumnNames: _col1 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - null-subquery2:x-subquery2:$INTNAME TableScan Map Join Operator condition map: @@ -2502,26 +2176,24 @@ STAGE PLANS: condition expressions: 0 1 {key} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] + 0 _col0 (type: string) + 1 key (type: string) outputColumnNames: _col1 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 126 Data size: 12786 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2539,49 +2211,43 @@ STAGE PLANS: null-subquery1:x-subquery1:c TableScan alias: c + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE null-subquery2:x-subquery2:c TableScan alias: c + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:x-subquery1:tmp:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -2592,22 +2258,19 @@ STAGE PLANS: null-subquery1:x-subquery1:tmp:b TableScan alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/multi_insert.q.out ql/src/test/results/clientpositive/multi_insert.q.out index 7fd7c7a..4124e45 100644 --- ql/src/test/results/clientpositive/multi_insert.q.out +++ ql/src/test/results/clientpositive/multi_insert.q.out @@ -18,9 +18,6 @@ from src insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -31,43 +28,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -175,9 +164,6 @@ POSTHOOK: Lineage: src_multi1.key SIMPLE [(src)src.FieldSchema(name:key, type:st POSTHOOK: Lineage: src_multi1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 @@ -198,43 +184,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -265,12 +243,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -279,12 +255,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -321,12 +295,10 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -335,12 +307,10 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -444,9 +414,6 @@ POSTHOOK: Lineage: src_multi2.key SIMPLE [(src)src.FieldSchema(name:key, type:st POSTHOOK: Lineage: src_multi2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -457,43 +424,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -633,9 +592,6 @@ POSTHOOK: Lineage: src_multi2.key SIMPLE [(src)src.FieldSchema(name:key, type:st POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 @@ -656,43 +612,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -723,12 +671,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -737,12 +683,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -779,12 +723,10 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -793,12 +735,10 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -934,9 +874,6 @@ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -947,87 +884,60 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 10) or ((key > 10) and (key < 20))) - type: boolean + predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: -1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 < 10) - type: boolean + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((KEY._col0 > 10) and (KEY._col0 < 20)) - type: boolean + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1176,9 +1086,6 @@ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 @@ -1199,87 +1106,60 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 10) or ((key > 10) and (key < 20))) - type: boolean + predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: -1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 < 10) - type: boolean + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((KEY._col0 > 10) and (KEY._col0 < 20)) - type: boolean + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1310,12 +1190,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1324,12 +1202,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1366,12 +1242,10 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1380,12 +1254,10 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1514,9 +1386,6 @@ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -1527,87 +1396,60 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 10) or ((key > 10) and (key < 20))) - type: boolean + predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: -1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 < 10) - type: boolean + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((KEY._col0 > 10) and (KEY._col0 < 20)) - type: boolean + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1756,9 +1598,6 @@ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 @@ -1779,87 +1618,60 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 10) or ((key > 10) and (key < 20))) - type: boolean + predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: -1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 < 10) - type: boolean + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((KEY._col0 > 10) and (KEY._col0 < 20)) - type: boolean + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1890,12 +1702,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1904,12 +1714,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1946,12 +1754,10 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1960,12 +1766,10 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2094,9 +1898,6 @@ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -2107,100 +1908,80 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:s-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - null-subquery2:s-subquery2:src TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2391,9 +2172,6 @@ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 @@ -2414,100 +2192,80 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:s-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - null-subquery2:s-subquery2:src TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2538,12 +2296,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2552,12 +2308,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2594,12 +2348,10 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2608,12 +2360,10 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2800,9 +2550,6 @@ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -2813,100 +2560,80 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:s-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - null-subquery2:s-subquery2:src TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3129,9 +2856,6 @@ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 @@ -3152,100 +2876,80 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:s-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - null-subquery2:s-subquery2:src TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3276,12 +2980,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3290,12 +2992,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3332,12 +3032,10 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3346,12 +3044,10 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3569,9 +3265,6 @@ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 @@ -3581,60 +3274,48 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 0) - type: boolean + predicate: (key = 0) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: - expr: (key = 2) - type: boolean + predicate: (key = 2) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: - expr: (key = 4) - type: boolean + predicate: (key = 4) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3742,9 +3423,6 @@ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 @@ -3754,60 +3432,48 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 0) - type: boolean + predicate: (key = 0) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: - expr: (key = 2) - type: boolean + predicate: (key = 2) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: - expr: (key = 4) - type: boolean + predicate: (key = 4) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3915,9 +3581,6 @@ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 @@ -3927,60 +3590,48 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 0) - type: boolean + predicate: (key = 0) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: - expr: (key = 2) - type: boolean + predicate: (key = 2) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: - expr: (key = 4) - type: boolean + predicate: (key = 4) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4088,9 +3739,6 @@ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 @@ -4100,60 +3748,48 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 0) - type: boolean + predicate: (key = 0) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: - expr: (key = 2) - type: boolean + predicate: (key = 2) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: - expr: (key = 4) - type: boolean + predicate: (key = 4) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/multi_insert_gby.q.out ql/src/test/results/clientpositive/multi_insert_gby.q.out index 75c0710..28ab667 100644 --- ql/src/test/results/clientpositive/multi_insert_gby.q.out +++ ql/src/test/results/clientpositive/multi_insert_gby.q.out @@ -22,9 +22,6 @@ INSERT OVERWRITE TABLE e1 INSERT OVERWRITE TABLE e2 SELECT key, COUNT(*) WHERE key>500 GROUP BY key ORDER BY key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT))) (TOK_WHERE (> (TOK_TABLE_OR_COL key) 450)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT))) (TOK_WHERE (> (TOK_TABLE_OR_COL key) 500)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -37,80 +34,59 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 450) or (key > 500)) - type: boolean + predicate: ((key > 450) or (key > 500)) (type: boolean) + Statistics: Num rows: 38 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 38 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: -1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 38 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 38 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 > 450) - type: boolean + predicate: (KEY._col0 > 450) (type: boolean) + Statistics: Num rows: 12 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count() + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator - predicate: - expr: (KEY._col0 > 500) - type: boolean + predicate: (KEY._col0 > 500) (type: boolean) + Statistics: Num rows: 12 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count() + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -118,32 +94,23 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -165,32 +132,23 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -301,9 +259,6 @@ INSERT OVERWRITE TABLE e2 POSTHOOK: type: QUERY POSTHOOK: Lineage: e1.count EXPRESSION [] POSTHOOK: Lineage: e2.count EXPRESSION [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT))) (TOK_WHERE (> (TOK_TABLE_OR_COL key) 450)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -316,72 +271,53 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: -1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 > 450) - type: boolean + predicate: (KEY._col0 > 450) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count() + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count() + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -389,32 +325,23 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -436,32 +363,23 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/multi_insert_gby2.q.out ql/src/test/results/clientpositive/multi_insert_gby2.q.out index f719b76..1f1f751 100644 --- ql/src/test/results/clientpositive/multi_insert_gby2.q.out +++ ql/src/test/results/clientpositive/multi_insert_gby2.q.out @@ -24,9 +24,6 @@ INSERT OVERWRITE TABLE e1 INSERT OVERWRITE TABLE e2 SELECT percentile_approx(value, 0.5) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL key)) value)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION percentile_approx (TOK_TABLE_OR_COL value) 0.5))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -38,38 +35,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: UDFToDouble(key) - type: double + expressions: key (type: string), UDFToDouble(key) (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: double + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Extract + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: double + expressions: _col1 (type: double) outputColumnNames: _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -77,52 +64,44 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col1 - type: double - expr: 0.5 - type: double + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), 0.5 (type: double) Reduce Operator Tree: Forward + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: complete outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int + expressions: UDFToInteger(_col0) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 Group By Operator - aggregations: - expr: percentile_approx(VALUE._col0, 0.5) - bucketGroup: false + aggregations: percentile_approx(VALUE._col0, 0.5) mode: complete outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double + expressions: _col0 (type: double) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/multi_insert_gby3.q.out ql/src/test/results/clientpositive/multi_insert_gby3.q.out index 5488fee..e16273b 100644 --- ql/src/test/results/clientpositive/multi_insert_gby3.q.out +++ ql/src/test/results/clientpositive/multi_insert_gby3.q.out @@ -27,9 +27,6 @@ INSERT OVERWRITE TABLE e1 INSERT OVERWRITE TABLE e2 SELECT key, sum(keyD), value group by key, value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL key)) keyD) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL keyD))) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -41,46 +38,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: UDFToDouble(key) - type: double - expr: value - type: string + expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: double - expr: _col2 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -88,72 +67,48 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col2 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToDouble(_col1) - type: double + expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1:0._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double - expr: _col1 - type: string + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -200,9 +155,6 @@ INSERT OVERWRITE TABLE e2 INSERT OVERWRITE TABLE e1 SELECT key, COUNT(distinct value) group by key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL key)) keyD) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL keyD))) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -214,46 +166,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: UDFToDouble(key) - type: double - expr: value - type: string + expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: double - expr: _col2 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -261,72 +195,48 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col2 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToDouble(_col1) - type: double + expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1:0._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double - expr: _col1 - type: string + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1732,9 +1642,6 @@ POSTHOOK: Lineage: e2.keyd EXPRESSION [(src)src.FieldSchema(name:key, type:strin POSTHOOK: Lineage: e2.keyd EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: e2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: e2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -1746,87 +1653,56 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT value) - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + aggregations: count(DISTINCT value) + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT key) - bucketGroup: false - keys: - expr: value - type: string - expr: key - type: string + aggregations: count(DISTINCT key) + keys: value (type: string), key (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToDouble(_col1) - type: double + expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1848,43 +1724,28 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToDouble(_col1) - type: double + expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1932,9 +1793,6 @@ POSTHOOK: Lineage: e2.keyd EXPRESSION [(src)src.FieldSchema(name:key, type:strin POSTHOOK: Lineage: e2.keyd EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: e2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: e2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL key)) keyD) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL keyD))) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_TABLE_OR_COL keyD)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL keyD) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 @@ -1948,93 +1806,50 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: UDFToDouble(key) - type: double - expr: value - type: string + expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: double - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) sort order: ++++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: double - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: double - expr: _col2 - type: string + Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT _col1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: double - expr: _col2 - type: string + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToDouble(_col3) - type: double + expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2043,72 +1858,48 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col2 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToDouble(_col1) - type: double + expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1:0._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double - expr: _col1 - type: string + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/multi_insert_lateral_view.q.out ql/src/test/results/clientpositive/multi_insert_lateral_view.q.out index 87ec606..05cb37c 100644 --- ql/src/test/results/clientpositive/multi_insert_lateral_view.q.out +++ ql/src/test/results/clientpositive/multi_insert_lateral_view.q.out @@ -40,9 +40,6 @@ from src_10 insert overwrite table src_lv1 select key, C lateral view explode(array(key+1, key+2)) A as C insert overwrite table src_lv2 select key, C lateral view explode(array(key+3, key+4)) A as C POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL C))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 1) (+ (TOK_TABLE_OR_COL key) 2))) C (TOK_TABALIAS A))))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL C))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 3) (+ (TOK_TABLE_OR_COL key) 4))) C (TOK_TABALIAS A)))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 @@ -63,99 +60,91 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src_10 + Map Operator Tree: TableScan alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: double + expressions: _col0 (type: string), _col4 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 Select Operator - expressions: - expr: array((key + 1),(key + 2)) - type: array + expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: double + expressions: _col0 (type: string), _col4 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 Lateral View Forward + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: double + expressions: _col0 (type: string), _col4 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 Select Operator - expressions: - expr: array((key + 3),(key + 4)) - type: array + expressions: array((key + 3),(key + 4)) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: double + expressions: _col0 (type: string), _col4 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -186,12 +175,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -200,12 +187,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -242,12 +227,10 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -256,12 +239,10 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -380,9 +361,6 @@ POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL C)))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 1) (+ (TOK_TABLE_OR_COL key) 2))) C (TOK_TABALIAS A)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL C)))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 3) (+ (TOK_TABLE_OR_COL key) 4))) C (TOK_TABALIAS A)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -394,166 +372,126 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src_10 + Map Operator Tree: TableScan alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: double + expressions: _col0 (type: string), _col4 (type: double) outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col4) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: sum(_col4) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Select Operator - expressions: - expr: array((key + 1),(key + 2)) - type: array + expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: double + expressions: _col0 (type: string), _col4 (type: double) outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col4) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: sum(_col4) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Lateral View Forward + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: double + expressions: _col0 (type: string), _col4 (type: double) outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col4) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: sum(_col4) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Select Operator - expressions: - expr: array((key + 3),(key + 4)) - type: array + expressions: array((key + 3),(key + 4)) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: double + expressions: _col0 (type: string), _col4 (type: double) outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col4) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: sum(_col4) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -575,41 +513,28 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -733,9 +658,6 @@ POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL C)))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 1) (+ (TOK_TABLE_OR_COL key) 2))) C (TOK_TABALIAS A)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL value)))) (TOK_WHERE (> (TOK_TABLE_OR_COL key) 200)) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL value)))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 200)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 @@ -749,121 +671,88 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - src_10 + Map Operator Tree: TableScan alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: double + expressions: _col0 (type: string), _col4 (type: double) outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col4) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: sum(_col4) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Select Operator - expressions: - expr: array((key + 1),(key + 2)) - type: array + expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: double + expressions: _col0 (type: string), _col4 (type: double) outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col4) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: sum(_col4) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Filter Operator - predicate: - expr: ((key > 200) or (key < 200)) - type: boolean + predicate: ((key > 200) or (key < 200)) (type: boolean) + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -885,74 +774,54 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: -1 - value expressions: - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 > 200) - type: boolean + predicate: (KEY._col0 > 200) (type: boolean) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 Filter Operator - predicate: - expr: (KEY._col0 < 200) - type: boolean + predicate: (KEY._col0 < 200) (type: boolean) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1143,9 +1012,6 @@ POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:INP POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL C)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL key)))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 1) (+ (TOK_TABLE_OR_COL key) 2))) C (TOK_TABALIAS A)))) (TOK_GROUPBY (TOK_TABLE_OR_COL C))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL C)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL key)))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 3) (+ (TOK_TABLE_OR_COL key) 4))) C (TOK_TABALIAS A)))) (TOK_GROUPBY (TOK_TABLE_OR_COL C))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 @@ -1160,203 +1026,142 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - src_10 + Map Operator Tree: TableScan alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: double - expr: _col0 - type: string + expressions: _col4 (type: double), _col0 (type: string) outputColumnNames: _col4, _col0 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(DISTINCT _col0) - bucketGroup: false - keys: - expr: _col4 - type: double - expr: _col0 - type: string + aggregations: sum(DISTINCT _col0) + keys: _col4 (type: double), _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double - expr: _col1 - type: string + key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: double - tag: -1 - value expressions: - expr: _col2 - type: double + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) Select Operator - expressions: - expr: array((key + 1),(key + 2)) - type: array + expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: double - expr: _col0 - type: string + expressions: _col4 (type: double), _col0 (type: string) outputColumnNames: _col4, _col0 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(DISTINCT _col0) - bucketGroup: false - keys: - expr: _col4 - type: double - expr: _col0 - type: string + aggregations: sum(DISTINCT _col0) + keys: _col4 (type: double), _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double - expr: _col1 - type: string + key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: double - tag: -1 - value expressions: - expr: _col2 - type: double + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) Lateral View Forward + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: double - expr: _col0 - type: string + expressions: _col4 (type: double), _col0 (type: string) outputColumnNames: _col4, _col0 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(DISTINCT _col0) - bucketGroup: false - keys: - expr: _col4 - type: double - expr: _col0 - type: string + aggregations: sum(DISTINCT _col0) + keys: _col4 (type: double), _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Select Operator - expressions: - expr: array((key + 3),(key + 4)) - type: array + expressions: array((key + 3),(key + 4)) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: double - expr: _col0 - type: string + expressions: _col4 (type: double), _col0 (type: string) outputColumnNames: _col4, _col0 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(DISTINCT _col0) - bucketGroup: false - keys: - expr: _col4 - type: double - expr: _col0 - type: string + aggregations: sum(DISTINCT _col0) + keys: _col4 (type: double), _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(DISTINCT key) - bucketGroup: false - keys: - expr: value - type: string - expr: key - type: string + aggregations: sum(DISTINCT key) + keys: value (type: string), key (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: double + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: double + expressions: _col0 (type: double), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1378,43 +1183,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: double - expr: _col1 - type: string + key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: double - tag: -1 - value expressions: - expr: _col2 - type: double + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: double + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: double + expressions: _col0 (type: double), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1436,43 +1226,28 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col2 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1725,9 +1500,6 @@ POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, ty POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL C)))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 1) (+ (TOK_TABLE_OR_COL key) 2))) C (TOK_TABALIAS A)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL C)))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 3) (+ (TOK_TABLE_OR_COL key) 4))) C (TOK_TABALIAS A)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL key)))) (TOK_WHERE (> (TOK_TABLE_OR_COL key) 200)) (TOK_GROUPBY (TOK_TABLE_OR_COL value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv4))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL key)))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 200)) (TOK_GROUPBY (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 @@ -1744,196 +1516,139 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - src_10 + Map Operator Tree: TableScan alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: double + expressions: _col0 (type: string), _col4 (type: double) outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(DISTINCT _col4) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col4 - type: double + aggregations: sum(DISTINCT _col4) + keys: _col0 (type: string), _col4 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: double + key expressions: _col0 (type: string), _col1 (type: double) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col2 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) Select Operator - expressions: - expr: array((key + 1),(key + 2)) - type: array + expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: double + expressions: _col0 (type: string), _col4 (type: double) outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(DISTINCT _col4) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col4 - type: double + aggregations: sum(DISTINCT _col4) + keys: _col0 (type: string), _col4 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: double + key expressions: _col0 (type: string), _col1 (type: double) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col2 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) Lateral View Forward + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: double + expressions: _col0 (type: string), _col4 (type: double) outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(DISTINCT _col4) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col4 - type: double + aggregations: sum(DISTINCT _col4) + keys: _col0 (type: string), _col4 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Select Operator - expressions: - expr: array((key + 3),(key + 4)) - type: array + expressions: array((key + 3),(key + 4)) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: double + expressions: _col0 (type: string), _col4 (type: double) outputColumnNames: _col0, _col4 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(DISTINCT _col4) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col4 - type: double + aggregations: sum(DISTINCT _col4) + keys: _col0 (type: string), _col4 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator - predicate: - expr: ((key > 200) or (key < 200)) - type: boolean + predicate: ((key > 200) or (key < 200)) (type: boolean) + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1955,43 +1670,28 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: double + key expressions: _col0 (type: string), _col1 (type: double) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col2 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2013,73 +1713,53 @@ STAGE PLANS: Stage: Stage-8 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: value - type: string - expr: key - type: string + key expressions: value (type: string), key (type: string) sort order: ++ - Map-reduce partition columns: - expr: value - type: string - tag: -1 + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col1:0._col0 > 200) - type: boolean + predicate: (KEY._col1:0._col0 > 200) (type: boolean) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 Filter Operator - predicate: - expr: (KEY._col1:0._col0 < 200) - type: boolean + predicate: (KEY._col1:0._col0 < 200) (type: boolean) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 4 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out index e264abb..987ae94 100644 --- ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out +++ ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out @@ -18,9 +18,6 @@ from src insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-4 depends on stages: Stage-2 @@ -32,43 +29,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -179,9 +168,6 @@ POSTHOOK: Lineage: src_multi1.key SIMPLE [(src)src.FieldSchema(name:key, type:st POSTHOOK: Lineage: src_multi1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 @@ -203,43 +189,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -286,12 +264,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -300,12 +276,10 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -329,12 +303,10 @@ STAGE PLANS: Stage: Stage-11 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -343,12 +315,10 @@ STAGE PLANS: Stage: Stage-13 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -452,9 +422,6 @@ POSTHOOK: Lineage: src_multi2.key SIMPLE [(src)src.FieldSchema(name:key, type:st POSTHOOK: Lineage: src_multi2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-4 depends on stages: Stage-2 @@ -466,43 +433,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -645,9 +604,6 @@ POSTHOOK: Lineage: src_multi2.key SIMPLE [(src)src.FieldSchema(name:key, type:st POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 @@ -669,43 +625,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -752,12 +700,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -766,12 +712,10 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -795,12 +739,10 @@ STAGE PLANS: Stage: Stage-11 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -809,12 +751,10 @@ STAGE PLANS: Stage: Stage-13 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -950,9 +890,6 @@ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-4 depends on stages: Stage-2 @@ -964,87 +901,60 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 10) or ((key > 10) and (key < 20))) - type: boolean + predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: -1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 < 10) - type: boolean + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((KEY._col0 > 10) and (KEY._col0 < 20)) - type: boolean + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1196,9 +1106,6 @@ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 @@ -1220,87 +1127,60 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 10) or ((key > 10) and (key < 20))) - type: boolean + predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: -1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 < 10) - type: boolean + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((KEY._col0 > 10) and (KEY._col0 < 20)) - type: boolean + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1347,12 +1227,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1361,12 +1239,10 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1390,12 +1266,10 @@ STAGE PLANS: Stage: Stage-11 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1404,12 +1278,10 @@ STAGE PLANS: Stage: Stage-13 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1538,9 +1410,6 @@ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-4 depends on stages: Stage-2 @@ -1552,87 +1421,60 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 10) or ((key > 10) and (key < 20))) - type: boolean + predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: -1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 < 10) - type: boolean + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((KEY._col0 > 10) and (KEY._col0 < 20)) - type: boolean + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1784,9 +1626,6 @@ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 @@ -1808,87 +1647,60 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 10) or ((key > 10) and (key < 20))) - type: boolean + predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: -1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 < 10) - type: boolean + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((KEY._col0 > 10) and (KEY._col0 < 20)) - type: boolean + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1935,12 +1747,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1949,12 +1759,10 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1978,12 +1786,10 @@ STAGE PLANS: Stage: Stage-11 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1992,12 +1798,10 @@ STAGE PLANS: Stage: Stage-13 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2126,9 +1930,6 @@ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-4 depends on stages: Stage-2 @@ -2140,100 +1941,80 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:s-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - null-subquery2:s-subquery2:src TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2427,9 +2208,6 @@ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 @@ -2451,100 +2229,80 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:s-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - null-subquery2:s-subquery2:src TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2591,12 +2349,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2605,12 +2361,10 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2634,12 +2388,10 @@ STAGE PLANS: Stage: Stage-11 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2648,12 +2400,10 @@ STAGE PLANS: Stage: Stage-13 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2840,9 +2590,6 @@ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-4 depends on stages: Stage-2 @@ -2854,100 +2601,80 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:s-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - null-subquery2:s-subquery2:src TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3173,9 +2900,6 @@ POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, typ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_multi2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 10) (< (TOK_TABLE_OR_COL key) 20))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 @@ -3197,100 +2921,80 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:s-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - null-subquery2:s-subquery2:src TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3337,12 +3041,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3351,12 +3053,10 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3380,12 +3080,10 @@ STAGE PLANS: Stage: Stage-11 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3394,12 +3092,10 @@ STAGE PLANS: Stage: Stage-13 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3617,9 +3313,6 @@ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 @@ -3630,60 +3323,48 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 0) - type: boolean + predicate: (key = 0) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: - expr: (key = 2) - type: boolean + predicate: (key = 2) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: - expr: (key = 4) - type: boolean + predicate: (key = 4) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3794,9 +3475,6 @@ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 @@ -3807,60 +3485,48 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 0) - type: boolean + predicate: (key = 0) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: - expr: (key = 2) - type: boolean + predicate: (key = 2) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: - expr: (key = 4) - type: boolean + predicate: (key = 4) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3971,9 +3637,6 @@ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 @@ -3984,60 +3647,48 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 0) - type: boolean + predicate: (key = 0) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: - expr: (key = 2) - type: boolean + predicate: (key = 2) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: - expr: (key = 4) - type: boolean + predicate: (key = 4) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4148,9 +3799,6 @@ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 @@ -4161,60 +3809,48 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 0) - type: boolean + predicate: (key = 0) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: - expr: (key = 2) - type: boolean + predicate: (key = 2) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: - expr: (key = 4) - type: boolean + predicate: (key = 4) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4325,9 +3961,6 @@ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -4339,86 +3972,57 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 10) or ((key > 10) and (key < 20))) - type: boolean + predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: -1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 < 10) - type: boolean + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator - predicate: - expr: ((KEY._col0 > 10) and (KEY._col0 < 20)) - type: boolean + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4426,28 +4030,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4464,28 +4060,20 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4581,9 +4169,6 @@ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -4595,86 +4180,57 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 10) or ((key > 10) and (key < 20))) - type: boolean + predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: -1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 < 10) - type: boolean + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator - predicate: - expr: ((KEY._col0 > 10) and (KEY._col0 < 20)) - type: boolean + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4682,28 +4238,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4720,28 +4268,20 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4837,9 +4377,6 @@ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -4851,86 +4388,57 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 10) or ((key > 10) and (key < 20))) - type: boolean + predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: -1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 < 10) - type: boolean + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator - predicate: - expr: ((KEY._col0 > 10) and (KEY._col0 < 20)) - type: boolean + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4938,28 +4446,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4976,28 +4476,20 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5093,9 +4585,6 @@ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -5107,86 +4596,57 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 10) or ((key > 10) and (key < 20))) - type: boolean + predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: -1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 < 10) - type: boolean + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator - predicate: - expr: ((KEY._col0 > 10) and (KEY._col0 < 20)) - type: boolean + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5194,28 +4654,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5232,28 +4684,20 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5353,9 +4797,6 @@ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-4 is a root stage Stage-6 depends on stages: Stage-4, Stage-8, Stage-9 @@ -5371,124 +4812,87 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 Filter Operator - predicate: - expr: ((key < 10) or ((key > 10) and (key < 20))) - type: boolean + predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: -1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 < 10) - type: boolean + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator - predicate: - expr: ((KEY._col0 > 10) and (KEY._col0 < 20)) - type: boolean + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5525,28 +4929,20 @@ STAGE PLANS: Stage: Stage-8 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5560,28 +4956,20 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5804,9 +5192,6 @@ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-4 is a root stage Stage-6 depends on stages: Stage-4, Stage-8, Stage-9 @@ -5822,124 +5207,87 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 Filter Operator - predicate: - expr: ((key < 10) or ((key > 10) and (key < 20))) - type: boolean + predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: -1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 < 10) - type: boolean + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator - predicate: - expr: ((KEY._col0 > 10) and (KEY._col0 < 20)) - type: boolean + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5976,28 +5324,20 @@ STAGE PLANS: Stage: Stage-8 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -6011,28 +5351,20 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -6271,9 +5603,6 @@ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-4 is a root stage Stage-11 depends on stages: Stage-4 , consists of Stage-8, Stage-7, Stage-9 @@ -6299,124 +5628,87 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 Filter Operator - predicate: - expr: ((key < 10) or ((key > 10) and (key < 20))) - type: boolean + predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: -1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 < 10) - type: boolean + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator - predicate: - expr: ((KEY._col0 > 10) and (KEY._col0 < 20)) - type: boolean + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6462,12 +5754,10 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -6476,12 +5766,10 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -6505,12 +5793,10 @@ STAGE PLANS: Stage: Stage-13 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -6519,12 +5805,10 @@ STAGE PLANS: Stage: Stage-15 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -6539,28 +5823,20 @@ STAGE PLANS: Stage: Stage-18 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -6574,28 +5850,20 @@ STAGE PLANS: Stage: Stage-19 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -6850,9 +6118,6 @@ POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_multi2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-4 is a root stage Stage-11 depends on stages: Stage-4 , consists of Stage-8, Stage-7, Stage-9 @@ -6878,124 +6143,87 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 Filter Operator - predicate: - expr: ((key < 10) or ((key > 10) and (key < 20))) - type: boolean + predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: -1 + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (KEY._col0 < 10) - type: boolean + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator - predicate: - expr: ((KEY._col0 > 10) and (KEY._col0 < 20)) - type: boolean + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -7041,12 +6269,10 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -7055,12 +6281,10 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -7084,12 +6308,10 @@ STAGE PLANS: Stage: Stage-13 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -7098,12 +6320,10 @@ STAGE PLANS: Stage: Stage-15 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -7118,28 +6338,20 @@ STAGE PLANS: Stage: Stage-18 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -7153,28 +6365,20 @@ STAGE PLANS: Stage: Stage-19 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/multi_join_union.q.out ql/src/test/results/clientpositive/multi_join_union.q.out index be92318..a67354c 100644 --- ql/src/test/results/clientpositive/multi_join_union.q.out +++ ql/src/test/results/clientpositive/multi_join_union.q.out @@ -36,9 +36,6 @@ src11 a JOIN src12 b ON (a.key = b.key) JOIN (SELECT * FROM (SELECT * FROM src13 UNION ALL SELECT * FROM src14)a )c ON c.value = b.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src11) a) (TOK_TABREF (TOK_TABNAME src12) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src13))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src14))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) c) (= (. (TOK_TABLE_OR_COL c) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-8 is a root stage Stage-6 depends on stages: Stage-8 @@ -58,106 +55,81 @@ STAGE PLANS: c-subquery1:a-subquery1:src13 TableScan alias: src13 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 {_col4} {_col5} {_col0} {_col1} 1 {_col0} {_col1} - handleSkewJoin: false keys: - 0 [Column[_col5]] - 1 [Column[_col1]] - Position of Big Table: 0 + 0 _col5 (type: string) + 1 _col1 (type: string) c-subquery2:a-subquery2:src14 TableScan alias: src14 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 {_col4} {_col5} {_col0} {_col1} 1 {_col0} {_col1} - handleSkewJoin: false keys: - 0 [Column[_col5]] - 1 [Column[_col1]] - Position of Big Table: 0 + 0 _col5 (type: string) + 1 _col1 (type: string) Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col4} {_col5} {_col0} {_col1} 1 {_col0} {_col1} - handleSkewJoin: false keys: - 0 [Column[_col5]] - 1 [Column[_col1]] + 0 _col5 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 0 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -174,6 +146,7 @@ STAGE PLANS: a TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE c-subquery1:a-subquery1:src13 c-subquery2:a-subquery2:src14 diff --git ql/src/test/results/clientpositive/multigroupby_singlemr.q.out ql/src/test/results/clientpositive/multigroupby_singlemr.q.out index 25d44b2..9002cfa 100644 --- ql/src/test/results/clientpositive/multigroupby_singlemr.q.out +++ ql/src/test/results/clientpositive/multigroupby_singlemr.q.out @@ -33,9 +33,6 @@ FROM TBL INSERT OVERWRITE TABLE DEST1 SELECT TBL.C1, COUNT(TBL.C2) GROUP BY TBL.C1 INSERT OVERWRITE TABLE DEST2 SELECT TBL.C1, TBL.C2, COUNT(TBL.C3) GROUP BY TBL.C1, TBL.C2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME TBL))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C1)) (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL TBL) C2)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL TBL) C1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C2)) (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL TBL) C3)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL TBL) C1) (. (TOK_TABLE_OR_COL TBL) C2)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -47,85 +44,56 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: c1 - type: int - expr: c2 - type: int + expressions: c1 (type: int), c2 (type: int) outputColumnNames: c1, c2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(c2) - bucketGroup: false - keys: - expr: c1 - type: int + aggregations: count(c2) + keys: c1 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) Select Operator - expressions: - expr: c1 - type: int - expr: c2 - type: int - expr: c3 - type: int + expressions: c1 (type: int), c2 (type: int), c3 (type: int) outputColumnNames: c1, c2, c3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(c3) - bucketGroup: false - keys: - expr: c1 - type: int - expr: c2 - type: int + aggregations: count(c3) + keys: c1 (type: int), c2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -147,49 +115,28 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: int), _col1 (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -219,9 +166,6 @@ FROM TBL INSERT OVERWRITE TABLE DEST1 SELECT TBL.C1, COUNT(TBL.C2) GROUP BY TBL.C1 INSERT OVERWRITE TABLE DEST2 SELECT TBL.C1, TBL.C2, COUNT(TBL.C3) GROUP BY TBL.C2, TBL.C1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME TBL))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C1)) (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL TBL) C2)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL TBL) C1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C2)) (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL TBL) C3)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL TBL) C2) (. (TOK_TABLE_OR_COL TBL) C1)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -233,85 +177,56 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: c1 - type: int - expr: c2 - type: int + expressions: c1 (type: int), c2 (type: int) outputColumnNames: c1, c2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(c2) - bucketGroup: false - keys: - expr: c1 - type: int + aggregations: count(c2) + keys: c1 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) Select Operator - expressions: - expr: c2 - type: int - expr: c1 - type: int - expr: c3 - type: int + expressions: c2 (type: int), c1 (type: int), c3 (type: int) outputColumnNames: c2, c1, c3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(c3) - bucketGroup: false - keys: - expr: c2 - type: int - expr: c1 - type: int + aggregations: count(c3) + keys: c2 (type: int), c1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -333,49 +248,28 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: int - expr: _col0 - type: int - expr: UDFToInteger(_col2) - type: int + expressions: _col1 (type: int), _col0 (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -405,9 +299,6 @@ FROM TBL INSERT OVERWRITE TABLE DEST3 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP BY TBL.C1, TBL.C2, TBL.C3 INSERT OVERWRITE TABLE DEST2 SELECT TBL.C1, TBL.C2, COUNT(TBL.C3) GROUP BY TBL.C1, TBL.C2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME TBL))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST3))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C3)) (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL TBL) C4)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL TBL) C1) (. (TOK_TABLE_OR_COL TBL) C2) (. (TOK_TABLE_OR_COL TBL) C3))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C2)) (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL TBL) C3)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL TBL) C1) (. (TOK_TABLE_OR_COL TBL) C2)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -419,109 +310,56 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: c1 - type: int - expr: c2 - type: int - expr: c3 - type: int - expr: c4 - type: int + expressions: c1 (type: int), c2 (type: int), c3 (type: int), c4 (type: int) outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(c4) - bucketGroup: false - keys: - expr: c1 - type: int - expr: c2 - type: int - expr: c3 - type: int + aggregations: count(c4) + keys: c1 (type: int), c2 (type: int), c3 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: int + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: int - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col3 (type: bigint) Select Operator - expressions: - expr: c1 - type: int - expr: c2 - type: int - expr: c3 - type: int + expressions: c1 (type: int), c2 (type: int), c3 (type: int) outputColumnNames: c1, c2, c3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(c3) - bucketGroup: false - keys: - expr: c1 - type: int - expr: c2 - type: int + aggregations: count(c3) + keys: c1 (type: int), c2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int - expr: KEY._col2 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: int - expr: UDFToInteger(_col3) - type: int + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -543,49 +381,28 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: int), _col1 (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -615,9 +432,6 @@ FROM TBL INSERT OVERWRITE TABLE DEST3 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP BY TBL.C1, TBL.C2, TBL.C3 INSERT OVERWRITE TABLE DEST4 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP BY TBL.C1, TBL.C3, TBL.C2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME TBL))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST3))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C3)) (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL TBL) C4)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL TBL) C1) (. (TOK_TABLE_OR_COL TBL) C2) (. (TOK_TABLE_OR_COL TBL) C3))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST4))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C3)) (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL TBL) C4)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL TBL) C1) (. (TOK_TABLE_OR_COL TBL) C3) (. (TOK_TABLE_OR_COL TBL) C2)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -628,102 +442,54 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: c1 - type: int - expr: c2 - type: int - expr: c3 - type: int - expr: c4 - type: int + expressions: c1 (type: int), c2 (type: int), c3 (type: int), c4 (type: int) outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: c1 - type: int - expr: c2 - type: int - expr: c3 - type: int + key expressions: c1 (type: int), c2 (type: int), c3 (type: int) sort order: +++ - Map-reduce partition columns: - expr: c1 - type: int - expr: c2 - type: int - expr: c3 - type: int - tag: -1 - value expressions: - expr: c4 - type: int + Map-reduce partition columns: c1 (type: int), c2 (type: int), c3 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: c4 (type: int) Reduce Operator Tree: Forward + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int - expr: KEY._col2 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: int - expr: UDFToInteger(_col3) - type: int + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col2 - type: int - expr: KEY._col1 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col2 (type: int), KEY._col1 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col2 - type: int - expr: _col1 - type: int - expr: UDFToInteger(_col3) - type: int + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -768,9 +534,6 @@ INSERT OVERWRITE TABLE DEST3 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP INSERT OVERWRITE TABLE DEST2 SELECT TBL.C1, TBL.C2, COUNT(TBL.C3) GROUP BY TBL.C1, TBL.C2 INSERT OVERWRITE TABLE DEST1 SELECT TBL.C1, COUNT(TBL.C2) GROUP BY TBL.C1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME TBL))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST3))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C3)) (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL TBL) C4)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL TBL) C1) (. (TOK_TABLE_OR_COL TBL) C2) (. (TOK_TABLE_OR_COL TBL) C3))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C2)) (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL TBL) C3)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL TBL) C1) (. (TOK_TABLE_OR_COL TBL) C2))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL TBL) C1)) (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL TBL) C2)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL TBL) C1)))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 @@ -785,132 +548,72 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: c1 - type: int - expr: c2 - type: int - expr: c3 - type: int - expr: c4 - type: int + expressions: c1 (type: int), c2 (type: int), c3 (type: int), c4 (type: int) outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(c4) - bucketGroup: false - keys: - expr: c1 - type: int - expr: c2 - type: int - expr: c3 - type: int + aggregations: count(c4) + keys: c1 (type: int), c2 (type: int), c3 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: int + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: int - tag: -1 - value expressions: - expr: _col3 - type: bigint + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col3 (type: bigint) Select Operator - expressions: - expr: c1 - type: int - expr: c2 - type: int - expr: c3 - type: int + expressions: c1 (type: int), c2 (type: int), c3 (type: int) outputColumnNames: c1, c2, c3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(c3) - bucketGroup: false - keys: - expr: c1 - type: int - expr: c2 - type: int + aggregations: count(c3) + keys: c1 (type: int), c2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Select Operator - expressions: - expr: c1 - type: int - expr: c2 - type: int + expressions: c1 (type: int), c2 (type: int) outputColumnNames: c1, c2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(c2) - bucketGroup: false - keys: - expr: c1 - type: int + aggregations: count(c2) + keys: c1 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int - expr: KEY._col2 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: int - expr: UDFToInteger(_col3) - type: int + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -932,49 +635,28 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: UDFToInteger(_col2) - type: int + expressions: _col0 (type: int), _col1 (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -996,41 +678,28 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/no_hooks.q.out ql/src/test/results/clientpositive/no_hooks.q.out index 0546dc6..795ee88 100644 --- ql/src/test/results/clientpositive/no_hooks.q.out +++ ql/src/test/results/clientpositive/no_hooks.q.out @@ -1,9 +1,6 @@ POSTHOOK: query: EXPLAIN SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) src1) (TOK_TABREF (TOK_TABNAME src) src2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (< (. (TOK_TABLE_OR_COL src1) key) 10) (< (. (TOK_TABLE_OR_COL src2) key) 10))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -12,37 +9,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1 + Map Operator Tree: TableScan - alias: src1 + alias: src2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - src2 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: src2 + alias: src1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -50,22 +37,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -73,35 +52,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: ++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + Statistics: Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 1983 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/noalias_subq1.q.out ql/src/test/results/clientpositive/noalias_subq1.q.out index c738c55..64c3536 100644 --- ql/src/test/results/clientpositive/noalias_subq1.q.out +++ ql/src/test/results/clientpositive/noalias_subq1.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT c1 FROM (select value as c1, key as c2 from src) x where c2 < 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value) c1) (TOK_SELEXPR (TOK_TABLE_OR_COL key) c2)))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL c1))) (TOK_WHERE (< (TOK_TABLE_OR_COL c2) 100)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,22 +11,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/nonblock_op_deduplicate.q.out ql/src/test/results/clientpositive/nonblock_op_deduplicate.q.out index cc5126e..98900ad 100644 --- ql/src/test/results/clientpositive/nonblock_op_deduplicate.q.out +++ ql/src/test/results/clientpositive/nonblock_op_deduplicate.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- negative, references twice for result of funcion explain select nkey, nkey + 1 from (select key + 1 as nkey, value from src) a POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) 1) nkey) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL nkey)) (TOK_SELEXPR (+ (TOK_TABLE_OR_COL nkey) 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,25 +11,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key + 1) - type: double + expressions: (key + 1) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: (_col0 + 1) - type: double + expressions: _col0 (type: double), (_col0 + 1) (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -66,9 +59,6 @@ FROM (SELECT tmp2.key as key, tmp2.value as value, tmp3.count as count FROM src1) tmp3 ) tmp4 order by key, value, count POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) tmp1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) tmp2) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count) count)))) tmp3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp2) value) value) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp3) count) count)))) tmp4)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp4) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp4) value) value) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp4) count) count)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -78,38 +68,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp4:tmp3:src1 + Map Operator Tree: TableScan alias: src1 + Statistics: Num rows: 0 Data size: 216 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 216 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -117,33 +102,23 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - tmp4:$INTNAME + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: 1 - value expressions: - expr: _col0 - type: bigint - tmp4:tmp2:tmp1:src1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) TableScan alias: src1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -151,20 +126,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -172,31 +141,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + Statistics: Num rows: 1 Data size: 237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 237 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -275,9 +232,6 @@ FROM (SELECT tmp2.key as key, tmp2.value as value, tmp3.count as count FROM src1) tmp3 ) tmp4 order by key, value, count POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) tmp1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) tmp2) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count) count)))) tmp3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp2) value) value) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp3) count) count)))) tmp4)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp4) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp4) value) value) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp4) count) count)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-6 depends on stages: Stage-1 @@ -287,38 +241,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp4:tmp3:src1 + Map Operator Tree: TableScan alias: src1 + Statistics: Num rows: 0 Data size: 216 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 216 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -334,27 +283,22 @@ STAGE PLANS: tmp4:tmp2:tmp1:src1 TableScan alias: src1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 {_col0} {_col1} 1 {_col0} - handleSkewJoin: false keys: - 0 [] - 1 [] - Position of Big Table: 1 + 0 + 1 Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - tmp4:$INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -362,45 +306,28 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {_col0} - handleSkewJoin: false keys: - 0 [] - 1 [] + 0 + 1 outputColumnNames: _col0, _col1, _col2 - Position of Big Table: 1 + Statistics: Num rows: 1 Data size: 237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + Statistics: Num rows: 1 Data size: 237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 237 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/nonmr_fetch.q.out ql/src/test/results/clientpositive/nonmr_fetch.q.out index 6b56094..617f4bb 100644 --- ql/src/test/results/clientpositive/nonmr_fetch.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- backward compatible (minimal) explain select * from src limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -17,14 +14,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select * from src limit 10 @@ -49,9 +46,6 @@ PREHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' PREHOOK: type: QUERY POSTHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11'))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -62,18 +56,14 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 @@ -102,9 +92,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- negative, select expression explain select key from src limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -112,19 +99,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 1000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -158,9 +146,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- negative, filter on non-partition column explain select * from srcpart where key > 100 limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL key) 100)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -168,29 +153,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 100) - type: boolean + predicate: (key > 100) (type: boolean) + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -232,9 +211,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- negative, table sampling explain select * from src TABLESAMPLE (0.25 PERCENT) limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_PERCENT 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -242,29 +218,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Split Sample: - src - percentage: 0.25 - seed number: 0 Stage: Stage-0 Fetch Operator @@ -294,9 +265,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- backward compatible (more) explain select * from src limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -307,14 +275,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select * from src limit 10 @@ -339,9 +307,6 @@ PREHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' PREHOOK: type: QUERY POSTHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11'))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -352,18 +317,14 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 @@ -392,9 +353,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- select expression explain select cast(key as int) * 10, upper(value) from src limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (* (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL key)) 10)) (TOK_SELEXPR (TOK_FUNCTION upper (TOK_TABLE_OR_COL value)))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -405,14 +363,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (UDFToInteger(key) * 10) - type: int - expr: upper(value) - type: string + expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select cast(key as int) * 10, upper(value) from src limit 10 @@ -439,9 +397,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- filter on non-partition column explain select key from src where key < 100 limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 100)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -452,16 +407,17 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1000 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select key from src where key < 100 limit 10 @@ -488,9 +444,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- select expr for partitioned table explain select key from srcpart where ds='2008-04-08' AND hr='11' limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11'))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -501,12 +454,14 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1000 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select key from srcpart where ds='2008-04-08' AND hr='11' limit 10 @@ -535,9 +490,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- virtual columns explain select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 10 limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -548,20 +500,17 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: BLOCK__OFFSET__INSIDE__FILE - type: bigint + expressions: key (type: string), value (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 100 limit 10 @@ -588,9 +537,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- virtual columns on partitioned table explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_LIMIT 30))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -601,24 +547,17 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string - expr: BLOCK__OFFSET__INSIDE__FILE - type: bigint + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 30 + Statistics: Num rows: 30 Data size: 6000 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30 @@ -673,9 +612,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- bucket sampling explain select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLEBUCKETSAMPLE 1 40 (TOK_TABLE_OR_COL key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -686,19 +622,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((hash(key) & 2147483647) % 40) = 0) - type: boolean + predicate: (((hash(key) & 2147483647) % 40) = 0) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: BLOCK__OFFSET__INSIDE__FILE - type: bigint + expressions: key (type: string), value (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key) @@ -720,9 +651,6 @@ PREHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLE PREHOOK: type: QUERY POSTHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (BUCKET 1 OUT OF 40 ON key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) (TOK_TABLEBUCKETSAMPLE 1 40 (TOK_TABLE_OR_COL key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -733,23 +661,14 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((hash(key) & 2147483647) % 40) = 0) - type: boolean + predicate: (((hash(key) & 2147483647) % 40) = 0) (type: boolean) + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string - expr: BLOCK__OFFSET__INSIDE__FILE - type: bigint + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (BUCKET 1 OUT OF 40 ON key) @@ -802,9 +721,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- split sampling explain select * from src TABLESAMPLE (0.25 PERCENT) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_PERCENT 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -815,13 +731,11 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select * from src TABLESAMPLE (0.25 PERCENT) @@ -838,9 +752,6 @@ PREHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLE PREHOOK: type: QUERY POSTHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) (TOK_TABLESPLITSAMPLE TOK_PERCENT 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -851,19 +762,11 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string - expr: BLOCK__OFFSET__INSIDE__FILE - type: bigint + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT) @@ -896,9 +799,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- non deterministic func explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) "2008-04-09") (> (TOK_FUNCTION rand) 1))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -909,19 +809,14 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (rand() > 1) - type: boolean + predicate: (rand() > 1) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: BLOCK__OFFSET__INSIDE__FILE - type: bigint + expressions: key (type: string), value (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1 @@ -942,9 +837,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- negative, groupby explain select key, count(value) from src group by key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -952,58 +844,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1019,9 +893,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- negative, distinct explain select distinct key, value from src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1029,59 +900,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1097,9 +946,6 @@ PREHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: query: -- negative, CTAS explain create table srcx as select distinct key, value from src POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME srcx) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1109,59 +955,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1178,12 +1002,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: key string, value string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: srcx - isExternal: false Stage: Stage-2 Stats-Aggr Operator @@ -1194,9 +1015,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- negative, analyze explain analyze table src compute statistics POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME src))) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 depends on stages: Stage-0 @@ -1204,10 +1022,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Stage: Stage-1 Stats-Aggr Operator @@ -1218,9 +1036,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- negative, subq explain select a.* from (select * from src) a POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1228,20 +1043,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1257,9 +1069,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- negative, join explain select * from src join src src2 on src.key=src2.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src) src2) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1267,41 +1076,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan - alias: src + alias: src2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - src2 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: src2 + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1309,22 +1102,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out index 28037bd..14fb5d5 100644 --- ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out @@ -2,9 +2,6 @@ PREHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' PREHOOK: type: QUERY POSTHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11'))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -15,27 +12,20 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (* (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL key)) 10)) (TOK_SELEXPR (TOK_FUNCTION upper (TOK_TABLE_OR_COL value)))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -46,23 +36,20 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (UDFToInteger(key) * 10) - type: int - expr: upper(value) - type: string + expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11'))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -70,25 +57,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -102,9 +84,6 @@ PREHOOK: query: explain select cast(key as int) * 10, upper(value) from src limi PREHOOK: type: QUERY POSTHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (* (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL key)) 10)) (TOK_SELEXPR (TOK_FUNCTION upper (TOK_TABLE_OR_COL value)))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -112,21 +91,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (UDFToInteger(key) * 10) - type: int - expr: upper(value) - type: string + expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out index c32843e..b702d91 100644 --- ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out +++ ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out @@ -11,9 +11,6 @@ PREHOOK: query: EXPLAIN INSERT INTO TABLE insert SELECT * FROM src LIMIT 100 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN INSERT INTO TABLE insert SELECT * FROM src LIMIT 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 100))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,39 +19,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -105,9 +97,6 @@ POSTHOOK: query: EXPLAIN INSERT INTO TABLE insert SELECT * FROM src LIMIT 100 POSTHOOK: type: QUERY POSTHOOK: Lineage: insert.as SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: insert.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME insert))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 100))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -116,39 +105,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -218,9 +202,6 @@ POSTHOOK: Lineage: insert.as SIMPLE [(src)src.FieldSchema(name:value, type:strin POSTHOOK: Lineage: insert.as SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: insert.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: insert.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME insert))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -229,39 +210,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/notable_alias1.q.out ql/src/test/results/clientpositive/notable_alias1.q.out index 60d15f3..e1dd01e 100644 --- ql/src/test/results/clientpositive/notable_alias1.q.out +++ ql/src/test/results/clientpositive/notable_alias1.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT '1234', key, count(1) WHERE src.key < 100 group by key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR '1234') (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL src) key) 100)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,62 +19,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: '1234' - type: string - expr: UDFToInteger(_col0) - type: int - expr: UDFToDouble(_col1) - type: double + expressions: '1234' (type: string), UDFToInteger(_col0) (type: int), UDFToDouble(_col1) (type: double) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/notable_alias2.q.out ql/src/test/results/clientpositive/notable_alias2.q.out index 9fd60a6..592b530 100644 --- ql/src/test/results/clientpositive/notable_alias2.q.out +++ ql/src/test/results/clientpositive/notable_alias2.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, count(1) WHERE key < 100 group by src.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR '1234') (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,62 +19,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: '1234' - type: string - expr: UDFToInteger(_col0) - type: int - expr: UDFToDouble(_col1) - type: double + expressions: '1234' (type: string), UDFToInteger(_col0) (type: int), UDFToDouble(_col1) (type: double) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/null_cast.q.out ql/src/test/results/clientpositive/null_cast.q.out index 8b4a97f..08fe784 100644 --- ql/src/test/results/clientpositive/null_cast.q.out +++ ql/src/test/results/clientpositive/null_cast.q.out @@ -10,9 +10,6 @@ POSTHOOK: query: EXPLAIN SELECT ARRAY(NULL, 0), ARRAY(NULL, STRUCT(0)) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION ARRAY TOK_NULL 0)) (TOK_SELEXPR (TOK_FUNCTION ARRAY TOK_NULL (TOK_FUNCTION ARRAY))) (TOK_SELEXPR (TOK_FUNCTION ARRAY TOK_NULL (TOK_FUNCTION MAP))) (TOK_SELEXPR (TOK_FUNCTION ARRAY TOK_NULL (TOK_FUNCTION STRUCT 0)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -20,25 +17,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: array(null,0) - type: array - expr: array(null,array()) - type: array> - expr: array(null,map()) - type: array> - expr: array(null,struct(0)) - type: array> + expressions: array(null,0) (type: array), array(null,array()) (type: array>), array(null,map()) (type: array>), array(null,struct(0)) (type: array>) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/nullformat.q.out ql/src/test/results/clientpositive/nullformat.q.out index 82379d7..f3bde00 100644 --- ql/src/test/results/clientpositive/nullformat.q.out +++ ql/src/test/results/clientpositive/nullformat.q.out @@ -35,9 +35,6 @@ PREHOOK: query: EXPLAIN CREATE TABLE null_tab1(a STRING, b STRING) ROW FORMAT DE PREHOOK: type: CREATETABLE POSTHOOK: query: EXPLAIN CREATE TABLE null_tab1(a STRING, b STRING) ROW FORMAT DELIMITED NULL DEFINED AS 'fooNull' POSTHOOK: type: CREATETABLE -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME null_tab1) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL a TOK_STRING) (TOK_TABCOL b TOK_STRING)) (TOK_TABLEROWFORMAT (TOK_SERDEPROPS (TOK_TABLEROWFORMATNULL 'fooNull')))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -46,12 +43,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: a string, b string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: null_tab1 - isExternal: false PREHOOK: query: CREATE TABLE null_tab1(a STRING, b STRING) ROW FORMAT DELIMITED NULL DEFINED AS 'fooNull' PREHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/nullformatCTAS.q.out ql/src/test/results/clientpositive/nullformatCTAS.q.out index 54d7432..df997ce 100644 --- ql/src/test/results/clientpositive/nullformatCTAS.q.out +++ ql/src/test/results/clientpositive/nullformatCTAS.q.out @@ -37,9 +37,6 @@ PREHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: query: EXPLAIN CREATE TABLE null_tab3 ROW FORMAT DELIMITED NULL DEFINED AS 'fooNull' AS SELECT a, b FROM base_tab POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME null_tab3) TOK_LIKETABLE (TOK_TABLEROWFORMAT (TOK_SERDEPROPS (TOK_TABLEROWFORMATNULL 'fooNull'))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME base_tab))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -54,20 +51,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - base_tab + Map Operator Tree: TableScan alias: base_tab + Statistics: Num rows: 0 Data size: 130 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: a - type: string - expr: b - type: string + expressions: a (type: string), b (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 130 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 130 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -93,24 +87,19 @@ STAGE PLANS: Create Table Operator: Create Table columns: a string, b string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: null_tab3 - isExternal: false Stage: Stage-2 Stats-Aggr Operator Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -119,12 +108,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/nullgroup.q.out ql/src/test/results/clientpositive/nullgroup.q.out index 34ead7f..2ac7dea 100644 --- ql/src/test/results/clientpositive/nullgroup.q.out +++ ql/src/test/results/clientpositive/nullgroup.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(1) from src x where x.key > 9999 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x) key) 9999)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,42 +11,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 9999) - type: boolean + predicate: (key > 9999) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: final outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -74,9 +66,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(1) from src x where x.key > 9999 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x) key) 9999)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -84,42 +73,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 9999) - type: boolean + predicate: (key > 9999) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -144,9 +128,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(1) from src x where x.key > 9999 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x) key) 9999)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -155,34 +136,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 9999) - type: boolean + predicate: (key > 9999) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: 1 - type: int + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: 1 (type: int) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: partial1 outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -190,30 +165,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: final outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -238,9 +208,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(1) from src x where x.key > 9999 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x) key) 9999)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -248,36 +215,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 9999) - type: boolean + predicate: (key > 9999) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: 1 - type: int + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: 1 (type: int) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: complete outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/nullgroup2.q.out ql/src/test/results/clientpositive/nullgroup2.q.out index 18170a8..cf31dc1 100644 --- ql/src/test/results/clientpositive/nullgroup2.q.out +++ ql/src/test/results/clientpositive/nullgroup2.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select x.key, count(1) from src x where x.key > 9999 group by x.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x) key) 9999)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -15,53 +12,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 9999) - type: boolean + predicate: (key > 9999) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -69,41 +51,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -127,9 +96,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select x.key, count(1) from src x where x.key > 9999 group by x.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x) key) 9999)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -137,60 +103,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 9999) - type: boolean + predicate: (key > 9999) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -214,9 +163,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select x.key, count(1) from src x where x.key > 9999 group by x.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x) key) 9999)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -225,44 +171,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 9999) - type: boolean + predicate: (key > 9999) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: 1 - type: int + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: 1 (type: int) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(1) + keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -270,41 +204,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -328,9 +249,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select x.key, count(1) from src x where x.key > 9999 group by x.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x) key) 9999)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -338,51 +256,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 9999) - type: boolean + predicate: (key > 9999) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: -1 - value expressions: - expr: 1 - type: int + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: 1 (type: int) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(1) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/nullgroup3.q.out ql/src/test/results/clientpositive/nullgroup3.q.out index 9f24c51..1dee25e 100644 --- ql/src/test/results/clientpositive/nullgroup3.q.out +++ ql/src/test/results/clientpositive/nullgroup3.q.out @@ -23,9 +23,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(1) from tstparttbl POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tstparttbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -33,38 +30,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tstparttbl + Map Operator Tree: TableScan alias: tstparttbl + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -112,9 +105,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(1) from tstparttbl2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tstparttbl2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -122,38 +112,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tstparttbl2 + Map Operator Tree: TableScan alias: tstparttbl2 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -209,9 +195,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(1) from tstparttbl POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tstparttbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -219,38 +202,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tstparttbl + Map Operator Tree: TableScan alias: tstparttbl + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -306,9 +285,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(1) from tstparttbl2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tstparttbl2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -316,38 +292,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tstparttbl2 + Map Operator Tree: TableScan alias: tstparttbl2 + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/nullgroup4.q.out ql/src/test/results/clientpositive/nullgroup4.q.out index d57540f..feae138 100644 --- ql/src/test/results/clientpositive/nullgroup4.q.out +++ ql/src/test/results/clientpositive/nullgroup4.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(1), count(distinct x.value) from src x where x.key = 9999 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL x) value)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 9999)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -15,54 +12,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 9999) - type: boolean + predicate: (key = 9999) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - expr: count(DISTINCT value) - bucketGroup: false - keys: - expr: value - type: string + aggregations: count(1), count(DISTINCT value) + keys: value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: count(DISTINCT KEY._col0:0._col0) - bucketGroup: false + aggregations: count(VALUE._col0), count(DISTINCT KEY._col0:0._col0) mode: partials outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -70,35 +50,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: count(VALUE._col1) - bucketGroup: false + aggregations: count(VALUE._col0), count(VALUE._col1) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -123,9 +93,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(1), count(distinct x.value) from src x where x.key = 9999 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL x) value)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 9999)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -133,58 +100,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 9999) - type: boolean + predicate: (key = 9999) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - expr: count(DISTINCT value) - bucketGroup: false - keys: - expr: value - type: string + aggregations: count(1), count(DISTINCT value) + keys: value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col1 - type: bigint - expr: _col2 - type: bigint + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: count(DISTINCT KEY._col0:0._col0) - bucketGroup: false + aggregations: count(VALUE._col0), count(DISTINCT KEY._col0:0._col0) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -209,9 +159,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(1), count(distinct x.value) from src x where x.key = 9999 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL x) value)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 9999)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -220,42 +167,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 9999) - type: boolean + predicate: (key = 9999) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: value (type: string) sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: -1 - value expressions: - expr: 1 - type: int + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: 1 (type: int) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(1) - expr: count(DISTINCT KEY._col0:0._col0) - bucketGroup: false + aggregations: count(1), count(DISTINCT KEY._col0:0._col0) mode: partial1 outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -263,35 +199,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: count(VALUE._col1) - bucketGroup: false + aggregations: count(VALUE._col0), count(VALUE._col1) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -316,9 +242,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(1), count(distinct x.value) from src x where x.key = 9999 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL x) value)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 9999)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -326,46 +249,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 9999) - type: boolean + predicate: (key = 9999) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: value (type: string) sort order: + - tag: -1 - value expressions: - expr: 1 - type: int + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: 1 (type: int) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(1) - expr: count(DISTINCT KEY._col0:0._col0) - bucketGroup: false + aggregations: count(1), count(DISTINCT KEY._col0:0._col0) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out index dc6beb0..2ee357f 100644 --- ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out +++ ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(1), count(distinct x.value), count(distinct substr(x.value, 5)) from src x where x.key = 9999 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL x) value))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL x) value) 5)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 9999)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,68 +11,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 9999) - type: boolean + predicate: (key = 9999) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - expr: count(DISTINCT value) - expr: count(DISTINCT substr(value, 5)) - bucketGroup: false - keys: - expr: value - type: string - expr: substr(value, 5) - type: string + aggregations: count(1), count(DISTINCT value), count(DISTINCT substr(value, 5)) + keys: value (type: string), substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: bigint + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: count(DISTINCT KEY._col0:0._col0) - expr: count(DISTINCT KEY._col0:1._col0) - bucketGroup: false + aggregations: count(VALUE._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -100,9 +70,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select count(1), count(distinct x.value), count(distinct substr(x.value, 5)) from src x where x.key = 9999 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL x) value))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL x) value) 5)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 9999)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -110,51 +77,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 9999) - type: boolean + predicate: (key = 9999) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string - expr: substr(value, 5) - type: string + key expressions: value (type: string), substr(value, 5) (type: string) sort order: ++ - tag: -1 - value expressions: - expr: 1 - type: int + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: 1 (type: int) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(1) - expr: count(DISTINCT KEY._col0:0._col0) - expr: count(DISTINCT KEY._col0:1._col0) - bucketGroup: false + aggregations: count(1), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/nullgroup5.q.out ql/src/test/results/clientpositive/nullgroup5.q.out index ae8b275..5a4d187 100644 --- ql/src/test/results/clientpositive/nullgroup5.q.out +++ ql/src/test/results/clientpositive/nullgroup5.q.out @@ -38,9 +38,6 @@ select u.* from select key, value from tstparttbl2 y where y.ds='2009-04-09' )u POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tstparttbl) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) ds) '2009-04-05')))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tstparttbl2) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL y) ds) '2009-04-09'))))) u)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME u)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -48,57 +45,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:u-subquery1:x - TableScan - alias: x - Filter Operator - predicate: - expr: (ds = '2009-04-05') - type: boolean - Select Operator - expressions: - expr: key - type: string - expr: value - type: string - outputColumnNames: _col0, _col1 - Union - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - null-subquery2:u-subquery2:y + Map Operator Tree: TableScan alias: y + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/nullscript.q.out ql/src/test/results/clientpositive/nullscript.q.out index b7a98ea..ec8c5e8 100644 --- ql/src/test/results/clientpositive/nullscript.q.out +++ ql/src/test/results/clientpositive/nullscript.q.out @@ -21,9 +21,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select transform(key) using 'cat' as key1 from nullscript POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME nullscript))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_TABLE_OR_COL key)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST key1)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -31,24 +28,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - nullscript + Map Operator Tree: TableScan alias: nullscript + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/num_op_type_conv.q.out ql/src/test/results/clientpositive/num_op_type_conv.q.out index 5b88dff..4926c8b 100644 --- ql/src/test/results/clientpositive/num_op_type_conv.q.out +++ ql/src/test/results/clientpositive/num_op_type_conv.q.out @@ -8,9 +8,6 @@ POSTHOOK: query: EXPLAIN SELECT null + 7, 1.0 - null, null + null, CAST(21 AS BIGINT) % CAST(21 AS BIGINT), 9 % "3" FROM src LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ TOK_NULL 7)) (TOK_SELEXPR (- 1.0 TOK_NULL)) (TOK_SELEXPR (+ TOK_NULL TOK_NULL)) (TOK_SELEXPR (% (TOK_FUNCTION TOK_BIGINT 21) (TOK_FUNCTION TOK_TINYINT 5))) (TOK_SELEXPR (% (TOK_FUNCTION TOK_BIGINT 21) (TOK_FUNCTION TOK_BIGINT 21))) (TOK_SELEXPR (% 9 "3"))) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -21,22 +18,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: (null + 7) - type: double - expr: (1.0 - null) - type: double - expr: (null + null) - type: double - expr: (UDFToLong(21) % UDFToByte(5)) - type: bigint - expr: (UDFToLong(21) % UDFToLong(21)) - type: bigint - expr: (9 % '3') - type: double + expressions: (null + 7) (type: double), (1.0 - null) (type: double), (null + null) (type: double), (UDFToLong(21) % UDFToByte(5)) (type: bigint), (UDFToLong(21) % UDFToLong(21)) (type: bigint), (9 % '3') (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT null + 7, 1.0 - null, null + null, diff --git ql/src/test/results/clientpositive/optional_outer.q.out ql/src/test/results/clientpositive/optional_outer.q.out index 3cfcc36..45c7c69 100644 --- ql/src/test/results/clientpositive/optional_outer.q.out +++ ql/src/test/results/clientpositive/optional_outer.q.out @@ -2,9 +2,6 @@ PREHOOK: query: EXPLAIN SELECT * FROM src a LEFT OUTER JOIN src b on (a.key=b.ke PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM src a LEFT OUTER JOIN src b on (a.key=b.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -12,41 +9,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -54,22 +35,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -83,9 +57,6 @@ PREHOOK: query: EXPLAIN SELECT * FROM src a LEFT JOIN src b on (a.key=b.key) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM src a LEFT JOIN src b on (a.key=b.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -93,41 +64,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -135,22 +90,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -164,9 +112,6 @@ PREHOOK: query: EXPLAIN SELECT * FROM src a RIGHT OUTER JOIN src b on (a.key=b.k PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM src a RIGHT OUTER JOIN src b on (a.key=b.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -174,41 +119,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -216,22 +145,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -245,9 +167,6 @@ PREHOOK: query: EXPLAIN SELECT * FROM src a RIGHT JOIN src b on (a.key=b.key) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM src a RIGHT JOIN src b on (a.key=b.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -255,41 +174,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -297,22 +200,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -326,9 +222,6 @@ PREHOOK: query: EXPLAIN SELECT * FROM src a FULL OUTER JOIN src b on (a.key=b.ke PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM src a FULL OUTER JOIN src b on (a.key=b.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -336,41 +229,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -378,22 +255,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -407,9 +277,6 @@ PREHOOK: query: EXPLAIN SELECT * FROM src a FULL JOIN src b on (a.key=b.key) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM src a FULL JOIN src b on (a.key=b.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -417,41 +284,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -459,22 +310,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/orc_createas1.q.out ql/src/test/results/clientpositive/orc_createas1.q.out index 895d6ba..af392c3 100644 --- ql/src/test/results/clientpositive/orc_createas1.q.out +++ ql/src/test/results/clientpositive/orc_createas1.q.out @@ -55,9 +55,6 @@ POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.Field POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME orc_createas1b) TOK_LIKETABLE TOK_TBLORCFILE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -72,20 +69,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -111,25 +105,20 @@ STAGE PLANS: Create Table Operator: Create Table columns: key string, value string - if not exists: false input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde name: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: orc_createas1b - isExternal: false Stage: Stage-2 Stats-Aggr Operator Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -138,12 +127,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -179,9 +166,6 @@ POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.Field POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME orc_createas1b))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))) (TOK_LIMIT 5))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -189,34 +173,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - orc_createas1b + Map Operator Tree: TableScan alias: orc_createas1b + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -259,9 +237,6 @@ POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.Field POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME orc_createas1c) TOK_LIKETABLE TOK_TBLORCFILE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME orc_createas1a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION PMOD (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key)) 50) part))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -276,22 +251,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - orc_createas1a + Map Operator Tree: TableScan alias: orc_createas1a + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: (hash(key) pmod 50) - type: int + expressions: key (type: int), value (type: string), (hash(key) pmod 50) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -317,25 +287,20 @@ STAGE PLANS: Create Table Operator: Create Table columns: key int, value string, part int - if not exists: false input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde name: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: orc_createas1c - isExternal: false Stage: Stage-2 Stats-Aggr Operator Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -344,12 +309,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out index 2030fbf..0bcfce4 100644 --- ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out +++ ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out @@ -146,9 +146,6 @@ POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type: POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME orc_pred))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL t))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -156,42 +153,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - orc_pred + Map Operator Tree: TableScan alias: orc_pred + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: t - type: tinyint + expressions: t (type: tinyint) outputColumnNames: t + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(t)) - bucketGroup: false + aggregations: sum(hash(t)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false + aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -216,9 +207,6 @@ POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type: POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME orc_pred))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL t))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -226,42 +214,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - orc_pred + Map Operator Tree: TableScan alias: orc_pred + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: t - type: tinyint + expressions: t (type: tinyint) outputColumnNames: t + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(t)) - bucketGroup: false + aggregations: sum(hash(t)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false + aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -432,9 +414,6 @@ POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type: POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME orc_pred))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL t))))) (TOK_WHERE (AND (AND (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL t)) (< (TOK_TABLE_OR_COL t) 0)) (> (TOK_TABLE_OR_COL t) (- 2)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -442,46 +421,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - orc_pred + Map Operator Tree: TableScan alias: orc_pred + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((t is not null and (t < 0)) and (t > (- 2))) - type: boolean + predicate: ((t is not null and (t < 0)) and (t > (- 2))) (type: boolean) + Statistics: Num rows: 58 Data size: 17204 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: t - type: tinyint + expressions: t (type: tinyint) outputColumnNames: t + Statistics: Num rows: 58 Data size: 17204 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(t)) - bucketGroup: false + aggregations: sum(hash(t)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false + aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -512,9 +484,6 @@ POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type: POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME orc_pred))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (TOK_TABLE_OR_COL t))))) (TOK_WHERE (AND (AND (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL t)) (< (TOK_TABLE_OR_COL t) 0)) (> (TOK_TABLE_OR_COL t) (- 2)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -522,49 +491,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - orc_pred + Map Operator Tree: TableScan alias: orc_pred filterExpr: - expr: ((t is not null and (t < 0)) and (t > (- 2))) - type: boolean + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((t is not null and (t < 0)) and (t > (- 2))) - type: boolean + predicate: ((t is not null and (t < 0)) and (t > (- 2))) (type: boolean) + Statistics: Num rows: 58 Data size: 17204 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: t - type: tinyint + expressions: t (type: tinyint) outputColumnNames: t + Statistics: Num rows: 58 Data size: 17204 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(t)) - bucketGroup: false + aggregations: sum(hash(t)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false + aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -655,9 +615,6 @@ POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type: POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME orc_pred))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL t)) (TOK_SELEXPR (TOK_TABLE_OR_COL s))) (TOK_WHERE (AND (AND (<=> (TOK_TABLE_OR_COL t) (- 1)) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL s))) (LIKE (TOK_TABLE_OR_COL s) 'bob%'))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL s))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -665,37 +622,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - orc_pred + Map Operator Tree: TableScan alias: orc_pred + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((t = (- 1)) and s is not null) and (s like 'bob%')) - type: boolean + predicate: (((t = (- 1)) and s is not null) and (s like 'bob%')) (type: boolean) + Statistics: Num rows: 131 Data size: 38859 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: t - type: tinyint - expr: s - type: string + expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 131 Data size: 38859 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: string + Statistics: Num rows: 131 Data size: 38859 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 131 Data size: 38859 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 131 Data size: 38859 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -728,9 +676,6 @@ POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type: POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME orc_pred))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL t)) (TOK_SELEXPR (TOK_TABLE_OR_COL s))) (TOK_WHERE (AND (AND (<=> (TOK_TABLE_OR_COL t) (- 1)) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL s))) (LIKE (TOK_TABLE_OR_COL s) 'bob%'))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL s))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -738,40 +683,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - orc_pred + Map Operator Tree: TableScan alias: orc_pred filterExpr: - expr: (((t = (- 1)) and s is not null) and (s like 'bob%')) - type: boolean + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((t = (- 1)) and s is not null) and (s like 'bob%')) - type: boolean + predicate: (((t = (- 1)) and s is not null) and (s like 'bob%')) (type: boolean) + Statistics: Num rows: 131 Data size: 38859 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: t - type: tinyint - expr: s - type: string + expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 131 Data size: 38859 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: string + Statistics: Num rows: 131 Data size: 38859 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 131 Data size: 38859 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 131 Data size: 38859 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -870,9 +804,6 @@ POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type: POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME orc_pred))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL t)) (TOK_SELEXPR (TOK_TABLE_OR_COL s))) (TOK_WHERE (AND (AND (AND (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL s)) (LIKE (TOK_TABLE_OR_COL s) 'bob%')) (NOT (TOK_FUNCTION IN (TOK_TABLE_OR_COL t) (- 1) (- 2) (- 3)))) (TOK_FUNCTION between KW_FALSE (TOK_TABLE_OR_COL t) 25 30))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL t)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL s))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -880,39 +811,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - orc_pred + Map Operator Tree: TableScan alias: orc_pred + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((s is not null and (s like 'bob%')) and (not (t) IN ((- 1), (- 2), (- 3)))) and t BETWEEN 25 AND 30) - type: boolean + predicate: (((s is not null and (s like 'bob%')) and (not (t) IN ((- 1), (- 2), (- 3)))) and t BETWEEN 25 AND 30) (type: boolean) + Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: t - type: tinyint - expr: s - type: string + expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: string + key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: string + Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -947,9 +867,6 @@ POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type: POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME orc_pred))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL t)) (TOK_SELEXPR (TOK_TABLE_OR_COL s))) (TOK_WHERE (AND (AND (AND (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL s)) (LIKE (TOK_TABLE_OR_COL s) 'bob%')) (NOT (TOK_FUNCTION IN (TOK_TABLE_OR_COL t) (- 1) (- 2) (- 3)))) (TOK_FUNCTION between KW_FALSE (TOK_TABLE_OR_COL t) 25 30))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL t)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL s))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -957,42 +874,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - orc_pred + Map Operator Tree: TableScan alias: orc_pred filterExpr: - expr: (((s is not null and (s like 'bob%')) and (not (t) IN ((- 1), (- 2), (- 3)))) and t BETWEEN 25 AND 30) - type: boolean + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((s is not null and (s like 'bob%')) and (not (t) IN ((- 1), (- 2), (- 3)))) and t BETWEEN 25 AND 30) - type: boolean + predicate: (((s is not null and (s like 'bob%')) and (not (t) IN ((- 1), (- 2), (- 3)))) and t BETWEEN 25 AND 30) (type: boolean) + Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: t - type: tinyint - expr: s - type: string + expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: string + key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: string + Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 65 Data size: 19281 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1115,9 +1019,6 @@ POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type: POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME orc_pred))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL t)) (TOK_SELEXPR (TOK_TABLE_OR_COL si)) (TOK_SELEXPR (TOK_TABLE_OR_COL d)) (TOK_SELEXPR (TOK_TABLE_OR_COL s))) (TOK_WHERE (AND (AND (AND (AND (AND (AND (>= (TOK_TABLE_OR_COL d) (TOK_FUNCTION ROUND 9.99)) (< (TOK_TABLE_OR_COL d) 12)) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL t))) (LIKE (TOK_TABLE_OR_COL s) '%son')) (NOT (LIKE (TOK_TABLE_OR_COL s) '%car%'))) (> (TOK_TABLE_OR_COL t) 0)) (TOK_FUNCTION between KW_FALSE (TOK_TABLE_OR_COL si) 300 400))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL s))) (TOK_LIMIT 3))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1125,46 +1026,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - orc_pred + Map Operator Tree: TableScan alias: orc_pred + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((((d >= round(9.99)) and (d < 12.0)) and t is not null) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) - type: boolean + predicate: (((((((d >= round(9.99)) and (d < 12.0)) and t is not null) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean) + Statistics: Num rows: 2 Data size: 593 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: t - type: tinyint - expr: si - type: smallint - expr: d - type: double - expr: s - type: string + expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 593 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col3 - type: string + key expressions: _col3 (type: string) sort order: - - tag: -1 - value expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: smallint - expr: _col2 - type: double - expr: _col3 - type: string + Statistics: Num rows: 2 Data size: 593 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 593 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 2 Data size: 593 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 2 Data size: 593 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1207,9 +1093,6 @@ POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type: POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME orc_pred))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL t)) (TOK_SELEXPR (TOK_TABLE_OR_COL si)) (TOK_SELEXPR (TOK_TABLE_OR_COL d)) (TOK_SELEXPR (TOK_TABLE_OR_COL s))) (TOK_WHERE (AND (AND (AND (AND (AND (AND (>= (TOK_TABLE_OR_COL d) (TOK_FUNCTION ROUND 9.99)) (< (TOK_TABLE_OR_COL d) 12)) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL t))) (LIKE (TOK_TABLE_OR_COL s) '%son')) (NOT (LIKE (TOK_TABLE_OR_COL s) '%car%'))) (> (TOK_TABLE_OR_COL t) 0)) (TOK_FUNCTION between KW_FALSE (TOK_TABLE_OR_COL si) 300 400))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL s))) (TOK_LIMIT 3))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1217,49 +1100,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - orc_pred + Map Operator Tree: TableScan alias: orc_pred filterExpr: - expr: (((((((d >= round(9.99)) and (d < 12.0)) and t is not null) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) - type: boolean + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((((d >= round(9.99)) and (d < 12.0)) and t is not null) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) - type: boolean + predicate: (((((((d >= round(9.99)) and (d < 12.0)) and t is not null) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean) + Statistics: Num rows: 2 Data size: 593 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: t - type: tinyint - expr: si - type: smallint - expr: d - type: double - expr: s - type: string + expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 593 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col3 - type: string + key expressions: _col3 (type: string) sort order: - - tag: -1 - value expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: smallint - expr: _col2 - type: double - expr: _col3 - type: string + Statistics: Num rows: 2 Data size: 593 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 593 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 2 Data size: 593 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 2 Data size: 593 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1390,9 +1256,6 @@ POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type: POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME orc_pred))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL t)) (TOK_SELEXPR (TOK_TABLE_OR_COL si)) (TOK_SELEXPR (TOK_TABLE_OR_COL d)) (TOK_SELEXPR (TOK_TABLE_OR_COL s))) (TOK_WHERE (AND (AND (AND (AND (AND (AND (AND (AND (> (TOK_TABLE_OR_COL t) 10) (<> (TOK_TABLE_OR_COL t) 101)) (>= (TOK_TABLE_OR_COL d) (TOK_FUNCTION ROUND 9.99))) (< (TOK_TABLE_OR_COL d) 12)) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL t))) (LIKE (TOK_TABLE_OR_COL s) '%son')) (NOT (LIKE (TOK_TABLE_OR_COL s) '%car%'))) (> (TOK_TABLE_OR_COL t) 0)) (TOK_FUNCTION between KW_FALSE (TOK_TABLE_OR_COL si) 300 400))) (TOK_SORTBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL s))) (TOK_LIMIT 3))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1401,46 +1264,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - orc_pred + Map Operator Tree: TableScan alias: orc_pred + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((((((t > 10) and (t <> 101)) and (d >= round(9.99))) and (d < 12.0)) and t is not null) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) - type: boolean + predicate: (((((((((t > 10) and (t <> 101)) and (d >= round(9.99))) and (d < 12.0)) and t is not null) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: t - type: tinyint - expr: si - type: smallint - expr: d - type: double - expr: s - type: string + expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col3 - type: string + key expressions: _col3 (type: string) sort order: - - tag: -1 - value expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: smallint - expr: _col2 - type: double - expr: _col3 - type: string + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1448,30 +1295,22 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col3 - type: string + key expressions: _col3 (type: string) sort order: - - tag: -1 - value expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: smallint - expr: _col2 - type: double - expr: _col3 - type: string + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1518,9 +1357,6 @@ POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type: POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME orc_pred))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL t)) (TOK_SELEXPR (TOK_TABLE_OR_COL si)) (TOK_SELEXPR (TOK_TABLE_OR_COL d)) (TOK_SELEXPR (TOK_TABLE_OR_COL s))) (TOK_WHERE (AND (AND (AND (AND (AND (AND (AND (AND (> (TOK_TABLE_OR_COL t) 10) (<> (TOK_TABLE_OR_COL t) 101)) (>= (TOK_TABLE_OR_COL d) (TOK_FUNCTION ROUND 9.99))) (< (TOK_TABLE_OR_COL d) 12)) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL t))) (LIKE (TOK_TABLE_OR_COL s) '%son')) (NOT (LIKE (TOK_TABLE_OR_COL s) '%car%'))) (> (TOK_TABLE_OR_COL t) 0)) (TOK_FUNCTION between KW_FALSE (TOK_TABLE_OR_COL si) 300 400))) (TOK_SORTBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL s))) (TOK_LIMIT 3))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1529,49 +1365,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - orc_pred + Map Operator Tree: TableScan alias: orc_pred filterExpr: - expr: (((((((((t > 10) and (t <> 101)) and (d >= round(9.99))) and (d < 12.0)) and t is not null) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) - type: boolean + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((((((t > 10) and (t <> 101)) and (d >= round(9.99))) and (d < 12.0)) and t is not null) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) - type: boolean + predicate: (((((((((t > 10) and (t <> 101)) and (d >= round(9.99))) and (d < 12.0)) and t is not null) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: t - type: tinyint - expr: si - type: smallint - expr: d - type: double - expr: s - type: string + expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col3 - type: string + key expressions: _col3 (type: string) sort order: - - tag: -1 - value expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: smallint - expr: _col2 - type: double - expr: _col3 - type: string + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1579,30 +1397,22 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col3 - type: string + key expressions: _col3 (type: string) sort order: - - tag: -1 - value expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: smallint - expr: _col2 - type: double - expr: _col3 - type: string + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/order.q.out ql/src/test/results/clientpositive/order.q.out index 2317a6f..1d0be9d 100644 --- ql/src/test/results/clientpositive/order.q.out +++ ql/src/test/results/clientpositive/order.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.* FROM SRC x ORDER BY key limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,34 +11,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -75,9 +66,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.* FROM SRC x ORDER BY key desc limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -85,34 +73,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: - - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/order2.q.out ql/src/test/results/clientpositive/order2.q.out index 359597e..0b7a203 100644 --- ql/src/test/results/clientpositive/order2.q.out +++ ql/src/test/results/clientpositive/order2.q.out @@ -8,9 +8,6 @@ SELECT subq.key, subq.value FROM (SELECT x.* FROM SRC x ORDER BY key limit 10) subq where subq.key < 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) value))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL subq) key) 10)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -18,45 +15,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq:x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 10) - type: boolean + predicate: (_col0 < 10) (type: boolean) + Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/outer_join_ppr.q.out ql/src/test/results/clientpositive/outer_join_ppr.q.out index 8cb60f7..6a65184 100644 --- ql/src/test/results/clientpositive/outer_join_ppr.q.out +++ ql/src/test/results/clientpositive/outer_join_ppr.q.out @@ -17,7 +17,88 @@ POSTHOOK: query: EXPLAIN EXTENDED WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL b) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 10) (< (. (TOK_TABLE_OR_COL a) key) 20)) (> (. (TOK_TABLE_OR_COL b) key) 15)) (< (. (TOK_TABLE_OR_COL b) key) 25))))) + +TOK_QUERY + TOK_FROM + TOK_FULLOUTERJOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_TABREF + TOK_TABNAME + srcpart + b + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -26,53 +107,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + alias: b + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string), value (type: string), ds (type: string) TableScan - alias: b - Statistics: - numRows: 116 dataSize: 23248 basicStatsState: COMPLETE colStatsState: NONE + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 116 dataSize: 23248 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string), value (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -305,37 +362,22 @@ STAGE PLANS: filter predicates: 0 1 {(VALUE._col2 = '2008-04-08')} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 - Statistics: - numRows: 127 dataSize: 25572 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 127 Data size: 25572 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: ((((_col4 > 15) and (_col4 < 25)) and (_col0 > 10)) and (_col0 < 20)) - type: boolean - Statistics: - numRows: 1 dataSize: 201 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((((_col4 > 15) and (_col4 < 25)) and (_col0 > 10)) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 201 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 1 dataSize: 201 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 201 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 201 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 201 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -417,7 +459,88 @@ POSTHOOK: query: EXPLAIN EXTENDED WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (AND (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 10) (< (. (TOK_TABLE_OR_COL a) key) 20)) (> (. (TOK_TABLE_OR_COL b) key) 15)) (< (. (TOK_TABLE_OR_COL b) key) 25)) (= (. (TOK_TABLE_OR_COL b) ds) '2008-04-08'))))) + +TOK_QUERY + TOK_FROM + TOK_FULLOUTERJOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_TABREF + TOK_TABNAME + srcpart + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -426,53 +549,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + alias: b + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string), value (type: string), ds (type: string) TableScan - alias: b - Statistics: - numRows: 116 dataSize: 23248 basicStatsState: COMPLETE colStatsState: NONE + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 116 dataSize: 23248 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string), value (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -700,37 +799,22 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col6 - Statistics: - numRows: 127 dataSize: 25572 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 127 Data size: 25572 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: (((((_col4 > 15) and (_col4 < 25)) and (_col6 = '2008-04-08')) and (_col0 > 10)) and (_col0 < 20)) - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + predicate: (((((_col4 > 15) and (_col4 < 25)) and (_col6 = '2008-04-08')) and (_col0 > 10)) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/parallel.q.out ql/src/test/results/clientpositive/parallel.q.out index 8d0ea22..d97d1d1 100644 --- ql/src/test/results/clientpositive/parallel.q.out +++ ql/src/test/results/clientpositive/parallel.q.out @@ -18,9 +18,6 @@ from (select key, value from src group by key, value) s insert overwrite table src_a select s.key, s.value group by s.key, s.value insert overwrite table src_b select s.key, s.value group by s.key, s.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_a))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) value))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) key) (. (TOK_TABLE_OR_COL s) value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_b))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) value))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) key) (. (TOK_TABLE_OR_COL s) value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -32,59 +29,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - s:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -92,67 +66,45 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_a Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/parallel_orderby.q.out ql/src/test/results/clientpositive/parallel_orderby.q.out index edc60cd..e1a7e53 100644 --- ql/src/test/results/clientpositive/parallel_orderby.q.out +++ ql/src/test/results/clientpositive/parallel_orderby.q.out @@ -21,9 +21,6 @@ PREHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: query: explain create table total_ordered as select * from src5 order by key, value POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME total_ordered) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src5))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -33,36 +30,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src5 + Map Operator Tree: TableScan alias: src5 + Statistics: Num rows: 2 Data size: 560 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 560 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - Sampling: SAMPLING_ON_START + Statistics: Num rows: 2 Data size: 560 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 2 Data size: 560 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 560 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -79,12 +65,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: key string, value string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: total_ordered - isExternal: false Stage: Stage-2 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/partition_wise_fileformat2.q.out ql/src/test/results/clientpositive/partition_wise_fileformat2.q.out index c92c496..edf1a1f 100644 --- ql/src/test/results/clientpositive/partition_wise_fileformat2.q.out +++ ql/src/test/results/clientpositive/partition_wise_fileformat2.q.out @@ -71,9 +71,6 @@ POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME partition_test_partitioned))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (and (>= (TOK_TABLE_OR_COL dt) 100) (<= (TOK_TABLE_OR_COL dt) 102))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -81,24 +78,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - partition_test_partitioned + Map Operator Tree: TableScan alias: partition_test_partitioned + Statistics: Num rows: 75 Data size: 523 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: dt - type: string - expr: BLOCK__OFFSET__INSIDE__FILE - type: bigint + expressions: key (type: string), value (type: string), dt (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 75 Data size: 523 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 75 Data size: 523 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -213,9 +203,6 @@ POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME partition_test_partitioned))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (and (>= (TOK_TABLE_OR_COL dt) 100) (<= (TOK_TABLE_OR_COL dt) 102))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -226,17 +213,11 @@ STAGE PLANS: Processor Tree: TableScan alias: partition_test_partitioned + Statistics: Num rows: 75 Data size: 523 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: dt - type: string - expr: BLOCK__OFFSET__INSIDE__FILE - type: bigint + expressions: key (type: string), value (type: string), dt (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 75 Data size: 523 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select * from partition_test_partitioned where dt >=100 and dt <= 102 diff --git ql/src/test/results/clientpositive/pcr.q.out ql/src/test/results/clientpositive/pcr.q.out index 50f3bf5..7f216d9 100644 --- ql/src/test/results/clientpositive/pcr.q.out +++ ql/src/test/results/clientpositive/pcr.q.out @@ -62,7 +62,44 @@ POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldS POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME pcr_t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (and (<= (TOK_TABLE_OR_COL ds) '2000-04-09') (< (TOK_TABLE_OR_COL key) 5))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + and + <= + TOK_TABLE_OR_COL + ds + '2000-04-09' + < + TOK_TABLE_OR_COL + key + 5 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -71,48 +108,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - pcr_t1 + Map Operator Tree: TableScan alias: pcr_t1 - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 5) - type: boolean - Statistics: - numRows: 13 dataSize: 104 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 5) (type: boolean) + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 13 dataSize: 104 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col2 - type: string + key expressions: _col0 (type: int), _col2 (type: string) sort order: ++ - Statistics: - numRows: 13 dataSize: 104 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -204,15 +218,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 13 dataSize: 104 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 13 dataSize: 104 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -272,7 +284,38 @@ POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldS POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME pcr_t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (or (<= (TOK_TABLE_OR_COL ds) '2000-04-09') (< (TOK_TABLE_OR_COL key) 5))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + or + <= + TOK_TABLE_OR_COL + ds + '2000-04-09' + < + TOK_TABLE_OR_COL + key + 5 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -281,42 +324,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - pcr_t1 + Map Operator Tree: TableScan alias: pcr_t1 - Statistics: - numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((ds <= '2000-04-09') or (key < 5)) - type: boolean - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((ds <= '2000-04-09') or (key < 5)) (type: boolean) + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + value expressions: _col0 (type: int), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -450,15 +476,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -555,7 +579,49 @@ POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldS POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME pcr_t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (and (and (<= (TOK_TABLE_OR_COL ds) '2000-04-09') (< (TOK_TABLE_OR_COL key) 5)) (!= (TOK_TABLE_OR_COL value) 'val_2'))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + and + and + <= + TOK_TABLE_OR_COL + ds + '2000-04-09' + < + TOK_TABLE_OR_COL + key + 5 + != + TOK_TABLE_OR_COL + value + 'val_2' + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -564,48 +630,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - pcr_t1 + Map Operator Tree: TableScan alias: pcr_t1 - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key < 5) and (value <> 'val_2')) - type: boolean - Statistics: - numRows: 13 dataSize: 104 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key < 5) and (value <> 'val_2')) (type: boolean) + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 13 dataSize: 104 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col2 - type: string + key expressions: _col0 (type: int), _col2 (type: string) sort order: ++ - Statistics: - numRows: 13 dataSize: 104 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -697,15 +740,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 13 dataSize: 104 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 13 dataSize: 104 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -767,7 +808,54 @@ POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldS POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME pcr_t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (or (and (< (TOK_TABLE_OR_COL ds) '2000-04-09') (< (TOK_TABLE_OR_COL key) 5)) (and (> (TOK_TABLE_OR_COL ds) '2000-04-09') (== (TOK_TABLE_OR_COL value) 'val_5')))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + and + < + TOK_TABLE_OR_COL + ds + '2000-04-09' + < + TOK_TABLE_OR_COL + key + 5 + and + > + TOK_TABLE_OR_COL + ds + '2000-04-09' + == + TOK_TABLE_OR_COL + value + 'val_5' + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -776,48 +864,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - pcr_t1 + Map Operator Tree: TableScan alias: pcr_t1 - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (((ds < '2000-04-09') and (key < 5)) or ((ds > '2000-04-09') and (value = 'val_5'))) - type: boolean - Statistics: - numRows: 10 dataSize: 80 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((ds < '2000-04-09') and (key < 5)) or ((ds > '2000-04-09') and (value = 'val_5'))) (type: boolean) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 10 dataSize: 80 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col2 - type: string + key expressions: _col0 (type: int), _col2 (type: string) sort order: ++ - Statistics: - numRows: 10 dataSize: 80 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -909,15 +974,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 10 dataSize: 80 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 10 dataSize: 80 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -981,7 +1044,54 @@ POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldS POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME pcr_t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (or (and (< (TOK_TABLE_OR_COL ds) '2000-04-10') (< (TOK_TABLE_OR_COL key) 5)) (and (> (TOK_TABLE_OR_COL ds) '2000-04-08') (== (TOK_TABLE_OR_COL value) 'val_5')))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + and + < + TOK_TABLE_OR_COL + ds + '2000-04-10' + < + TOK_TABLE_OR_COL + key + 5 + and + > + TOK_TABLE_OR_COL + ds + '2000-04-08' + == + TOK_TABLE_OR_COL + value + 'val_5' + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -990,48 +1100,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - pcr_t1 + Map Operator Tree: TableScan alias: pcr_t1 - Statistics: - numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (((ds < '2000-04-10') and (key < 5)) or ((ds > '2000-04-08') and (value = 'val_5'))) - type: boolean - Statistics: - numRows: 16 dataSize: 128 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((ds < '2000-04-10') and (key < 5)) or ((ds > '2000-04-08') and (value = 'val_5'))) (type: boolean) + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 16 dataSize: 128 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col2 - type: string + key expressions: _col0 (type: int), _col2 (type: string) sort order: ++ - Statistics: - numRows: 16 dataSize: 128 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1165,15 +1252,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 16 dataSize: 128 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 16 dataSize: 128 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1247,7 +1332,54 @@ POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldS POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME pcr_t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (and (or (< (TOK_TABLE_OR_COL ds) '2000-04-10') (< (TOK_TABLE_OR_COL key) 5)) (or (> (TOK_TABLE_OR_COL ds) '2000-04-08') (== (TOK_TABLE_OR_COL value) 'val_5')))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + and + or + < + TOK_TABLE_OR_COL + ds + '2000-04-10' + < + TOK_TABLE_OR_COL + key + 5 + or + > + TOK_TABLE_OR_COL + ds + '2000-04-08' + == + TOK_TABLE_OR_COL + value + 'val_5' + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1256,48 +1388,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - pcr_t1 + Map Operator Tree: TableScan alias: pcr_t1 - Statistics: - numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (((ds < '2000-04-10') or (key < 5)) and ((ds > '2000-04-08') or (value = 'val_5'))) - type: boolean - Statistics: - numRows: 33 dataSize: 264 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((ds < '2000-04-10') or (key < 5)) and ((ds > '2000-04-08') or (value = 'val_5'))) (type: boolean) + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 33 dataSize: 264 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col2 - type: string + key expressions: _col0 (type: int), _col2 (type: string) sort order: ++ - Statistics: - numRows: 33 dataSize: 264 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1431,15 +1540,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 33 dataSize: 264 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 33 dataSize: 264 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1521,7 +1628,46 @@ POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldS POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME pcr_t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (or (= (TOK_TABLE_OR_COL ds) '2000-04-08') (= (TOK_TABLE_OR_COL ds) '2000-04-09')) (= (TOK_TABLE_OR_COL key) 14))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + and + or + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + ds + '2000-04-09' + = + TOK_TABLE_OR_COL + key + 14 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1530,44 +1676,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - pcr_t1 + Map Operator Tree: TableScan alias: pcr_t1 - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key = 14) - type: boolean - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key = 14) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + value expressions: _col0 (type: int), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1659,15 +1786,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1717,7 +1842,41 @@ POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldS POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME pcr_t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (or (= (TOK_TABLE_OR_COL ds) '2000-04-08') (= (TOK_TABLE_OR_COL ds) '2000-04-09'))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + or + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + ds + '2000-04-09' + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1726,37 +1885,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - pcr_t1 + Map Operator Tree: TableScan alias: pcr_t1 - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + value expressions: _col0 (type: int), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1848,15 +1991,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1946,7 +2087,41 @@ POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldS POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME pcr_t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (or (>= (TOK_TABLE_OR_COL ds) '2000-04-08') (< (TOK_TABLE_OR_COL ds) '2000-04-10'))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + or + >= + TOK_TABLE_OR_COL + ds + '2000-04-08' + < + TOK_TABLE_OR_COL + ds + '2000-04-10' + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1955,37 +2130,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - pcr_t1 + Map Operator Tree: TableScan alias: pcr_t1 - Statistics: - numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: - numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + value expressions: _col0 (type: int), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2119,15 +2278,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2239,7 +2396,57 @@ POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldS POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME pcr_t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (or (and (= (TOK_TABLE_OR_COL ds) '2000-04-08') (= (TOK_TABLE_OR_COL key) 1)) (and (= (TOK_TABLE_OR_COL ds) '2000-04-09') (= (TOK_TABLE_OR_COL key) 2)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + and + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + key + 1 + and + = + TOK_TABLE_OR_COL + ds + '2000-04-09' + = + TOK_TABLE_OR_COL + key + 2 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2248,50 +2455,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - pcr_t1 + Map Operator Tree: TableScan alias: pcr_t1 - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) - type: boolean - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) sort order: +++ - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2383,15 +2565,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2442,7 +2622,55 @@ POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldS POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME pcr_t1) t1) (TOK_TABREF (TOK_TABNAME pcr_t1) t2) (and (and (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)) (= (. (TOK_TABLE_OR_COL t1) ds) '2000-04-08')) (= (. (TOK_TABLE_OR_COL t2) ds) '2000-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL t1) key))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + pcr_t1 + t1 + TOK_TABREF + TOK_TABNAME + pcr_t1 + t2 + and + and + = + . + TOK_TABLE_OR_COL + t1 + key + . + TOK_TABLE_OR_COL + t2 + key + = + . + TOK_TABLE_OR_COL + t1 + ds + '2000-04-08' + = + . + TOK_TABLE_OR_COL + t2 + ds + '2000-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + t1 + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2452,55 +2680,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan - alias: t1 - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE + alias: t2 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - t2 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: int), value (type: string), ds (type: string) TableScan - alias: t2 - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE + alias: t1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: int), value (type: string), ds (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2555,27 +2757,12 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7 - Statistics: - numRows: 22 dataSize: 176 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: int - expr: _col6 - type: string - expr: _col7 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: - numRows: 22 dataSize: 176 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -2596,31 +2783,15 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Statistics: - numRows: 22 dataSize: 176 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: int - expr: _col4 - type: string - expr: _col5 - type: string + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2649,15 +2820,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 22 dataSize: 176 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 22 dataSize: 176 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2743,7 +2912,55 @@ POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldS POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME pcr_t1) t1) (TOK_TABREF (TOK_TABNAME pcr_t1) t2) (and (and (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)) (= (. (TOK_TABLE_OR_COL t1) ds) '2000-04-08')) (= (. (TOK_TABLE_OR_COL t2) ds) '2000-04-09')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL t1) key))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + pcr_t1 + t1 + TOK_TABREF + TOK_TABNAME + pcr_t1 + t2 + and + and + = + . + TOK_TABLE_OR_COL + t1 + key + . + TOK_TABLE_OR_COL + t2 + key + = + . + TOK_TABLE_OR_COL + t1 + ds + '2000-04-08' + = + . + TOK_TABLE_OR_COL + t2 + ds + '2000-04-09' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + t1 + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2753,55 +2970,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan - alias: t1 - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE + alias: t2 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - t2 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: int), value (type: string), ds (type: string) TableScan - alias: t2 - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE + alias: t1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: int), value (type: string), ds (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2898,27 +3089,12 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7 - Statistics: - numRows: 22 dataSize: 176 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: int - expr: _col6 - type: string - expr: _col7 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: - numRows: 22 dataSize: 176 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -2939,31 +3115,15 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Statistics: - numRows: 22 dataSize: 176 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: int - expr: _col4 - type: string - expr: _col5 - type: string + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2992,15 +3152,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 22 dataSize: 176 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 22 dataSize: 176 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3106,7 +3264,62 @@ POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldS POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-11).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME pcr_t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (or (and (> (TOK_TABLE_OR_COL ds) '2000-04-08') (< (TOK_TABLE_OR_COL ds) '2000-04-11')) (and (and (>= (TOK_TABLE_OR_COL ds) '2000-04-08') (<= (TOK_TABLE_OR_COL ds) '2000-04-11')) (= (TOK_TABLE_OR_COL key) 2)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + and + > + TOK_TABLE_OR_COL + ds + '2000-04-08' + < + TOK_TABLE_OR_COL + ds + '2000-04-11' + and + and + >= + TOK_TABLE_OR_COL + ds + '2000-04-08' + <= + TOK_TABLE_OR_COL + ds + '2000-04-11' + = + TOK_TABLE_OR_COL + key + 2 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -3115,50 +3328,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - pcr_t1 + Map Operator Tree: TableScan alias: pcr_t1 - Statistics: - numRows: 80 dataSize: 640 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 80 Data size: 640 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (((ds > '2000-04-08') and (ds < '2000-04-11')) or (key = 2)) - type: boolean - Statistics: - numRows: 48 dataSize: 384 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((ds > '2000-04-08') and (ds < '2000-04-11')) or (key = 2)) (type: boolean) + Statistics: Num rows: 48 Data size: 384 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 48 dataSize: 384 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 48 Data size: 384 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) sort order: +++ - Statistics: - numRows: 48 dataSize: 384 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 48 Data size: 384 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3334,15 +3522,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 48 dataSize: 384 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 48 Data size: 384 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 48 dataSize: 384 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 48 Data size: 384 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3442,7 +3628,57 @@ POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldS POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-11).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME pcr_t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (or (and (> (TOK_TABLE_OR_COL ds) '2000-04-08') (< (TOK_TABLE_OR_COL ds) '2000-04-11')) (and (<= (TOK_TABLE_OR_COL ds) '2000-04-09') (= (TOK_TABLE_OR_COL key) 2)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + and + > + TOK_TABLE_OR_COL + ds + '2000-04-08' + < + TOK_TABLE_OR_COL + ds + '2000-04-11' + and + <= + TOK_TABLE_OR_COL + ds + '2000-04-09' + = + TOK_TABLE_OR_COL + key + 2 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -3451,50 +3687,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - pcr_t1 + Map Operator Tree: TableScan alias: pcr_t1 - Statistics: - numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((ds > '2000-04-08') or ((ds <= '2000-04-09') and (key = 2))) - type: boolean - Statistics: - numRows: 30 dataSize: 240 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((ds > '2000-04-08') or ((ds <= '2000-04-09') and (key = 2))) (type: boolean) + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 30 dataSize: 240 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) sort order: +++ - Statistics: - numRows: 30 dataSize: 240 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3628,15 +3839,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 30 dataSize: 240 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 30 dataSize: 240 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3765,7 +3974,47 @@ POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldS POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-11).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME pcr_t1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME pcr_t2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2000-04-08'))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME pcr_t3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2000-04-08')))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + pcr_t2 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + pcr_t3 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + STAGE DEPENDENCIES: Stage-2 is a root stage @@ -3787,29 +4036,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - pcr_t1 + Map Operator Tree: TableScan alias: pcr_t1 - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3830,21 +4071,15 @@ STAGE PLANS: GatherStats: true MultiFileSpray: false Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3947,8 +4182,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -4014,8 +4248,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -4121,8 +4354,7 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -4188,8 +4420,7 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -4310,7 +4541,57 @@ POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, t POSTHOOK: Lineage: pcr_t3.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: pcr_t3.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME pcr_t1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME pcr_t2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '2000-04-08') (= (TOK_TABLE_OR_COL key) 2)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME pcr_t3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '2000-04-08') (= (TOK_TABLE_OR_COL key) 3))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + pcr_t2 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + and + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + key + 2 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + pcr_t3 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + and + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + key + 3 + STAGE DEPENDENCIES: Stage-2 is a root stage @@ -4332,36 +4613,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - pcr_t1 + Map Operator Tree: TableScan alias: pcr_t1 - Statistics: - numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key = 2) - type: boolean - Statistics: - numRows: 10 dataSize: 80 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key = 2) (type: boolean) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 10 dataSize: 80 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 10 dataSize: 80 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4388,27 +4658,18 @@ STAGE PLANS: MultiFileSpray: false Filter Operator isSamplingPred: false - predicate: - expr: (key = 3) - type: boolean - Statistics: - numRows: 10 dataSize: 80 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key = 3) (type: boolean) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 10 dataSize: 80 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 10 dataSize: 80 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4521,8 +4782,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -4603,8 +4863,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -4730,8 +4989,7 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -4812,8 +5070,7 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -4951,7 +5208,40 @@ POSTHOOK: Lineage: pcr_t3.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type: POSTHOOK: Lineage: pcr_t3.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: pcr_t3.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) 11))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + and + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + = + TOK_TABLE_OR_COL + hr + 11 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_LIMIT + 10 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -4960,35 +5250,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + value expressions: _col0 (type: string), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5039,18 +5315,16 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit - Statistics: - numRows: 10 dataSize: 2000 basicStatsState: COMPLETE colStatsState: NONE + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 10 dataSize: 2000 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -5116,7 +5390,60 @@ POSTHOOK: Lineage: pcr_t3.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type: POSTHOOK: Lineage: pcr_t3.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: pcr_t3.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '2008-04-08') (or (= (TOK_TABLE_OR_COL hr) '11') (= (TOK_TABLE_OR_COL hr) '12'))) (= (TOK_TABLE_OR_COL key) 11))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL hr))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_SELEXPR + TOK_TABLE_OR_COL + hr + TOK_WHERE + and + and + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + or + = + TOK_TABLE_OR_COL + hr + '11' + = + TOK_TABLE_OR_COL + hr + '12' + = + TOK_TABLE_OR_COL + key + 11 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + hr + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -5125,54 +5452,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key = 11) - type: boolean - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key = 11) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) sort order: +++ - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5266,15 +5564,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -5346,7 +5642,50 @@ POSTHOOK: Lineage: pcr_t3.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type: POSTHOOK: Lineage: pcr_t3.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: pcr_t3.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL hr) '11') (= (TOK_TABLE_OR_COL key) 11))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL hr))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_SELEXPR + TOK_TABLE_OR_COL + hr + TOK_WHERE + and + = + TOK_TABLE_OR_COL + hr + '11' + = + TOK_TABLE_OR_COL + key + 11 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + hr + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -5355,54 +5694,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key = 11) - type: boolean - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key = 11) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) sort order: +++ - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5496,15 +5806,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/plan_json.q.out ql/src/test/results/clientpositive/plan_json.q.out index e54a73c..e9fba62 100644 --- ql/src/test/results/clientpositive/plan_json.q.out +++ ql/src/test/results/clientpositive/plan_json.q.out @@ -6,4 +6,4 @@ POSTHOOK: query: -- explain plan json: the query gets the formatted json output EXPLAIN FORMATTED SELECT count(1) FROM src POSTHOOK: type: QUERY -{"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Vectorized execution:":"false","Map:":{"Vectorized execution:":"false","Split Sample:":{},"Alias -> Map Operator Tree:":{"src":{"TS_0":{"SEL_1":{"GBY_2":{"RS_3":{"Reduce Output Operator":{"Vectorized execution:":"false","Map-reduce partition columns:":[],"sort order:":"","tag:":"-1","value expressions:":[{"type:":"bigint","expr:":"_col0"}],"key expressions:":[]}}}}}}}},"Reduce:":{"Vectorized execution:":"false","Reduce Operator Tree:":{"GBY_4":{"SEL_5":{"FS_6":{"File Output Operator":{"Vectorized execution:":"false","GlobalTableId:":"0","compressed:":"false","table:":{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1"}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"ROOT STAGE":"TRUE"}},"ABSTRACT SYNTAX TREE":"(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)))))"} +{"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Reduce Operator Tree:":{"Group By Operator":{"mode:":"mergepartial","aggregations:":["count(VALUE._col0)"],"outputColumnNames:":["_col0"],"children":{"Select Operator":{"expressions:":"_col0 (type: bigint)","outputColumnNames:":["_col0"],"children":{"File Output Operator":{"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","compressed:":"false","table:":{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE"}},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE"}},"Map Operator Tree:":[{"TableScan":{"alias:":"src","children":{"Select Operator":{"children":{"Group By Operator":{"mode:":"hash","aggregations:":["count(1)"],"outputColumnNames:":["_col0"],"children":{"Reduce Output Operator":{"sort order:":"","value expressions:":"_col0 (type: bigint)","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE"}},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE"}},"Statistics:":"Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE"}},"Statistics:":"Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE"}}]}},"Stage-0":{"Fetch Operator":{"limit:":"-1"}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"ROOT STAGE":"TRUE"}}} diff --git ql/src/test/results/clientpositive/ppd1.q.out ql/src/test/results/clientpositive/ppd1.q.out index af5e6c4..2031787 100644 --- ql/src/test/results/clientpositive/ppd1.q.out +++ ql/src/test/results/clientpositive/ppd1.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT src.key as c3 from src where src.key > '2' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c3)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) '2')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,22 +11,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '2') - type: boolean + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -434,9 +429,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT src.key as c3 from src where src.key > '2' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c3)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) '2')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -444,22 +436,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '2') - type: boolean + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd2.q.out ql/src/test/results/clientpositive/ppd2.q.out index f6af8f8..2f2c558 100644 --- ql/src/test/results/clientpositive/ppd2.q.out +++ ql/src/test/results/clientpositive/ppd2.q.out @@ -26,9 +26,6 @@ from ( sort by a.key,a.cc desc) b where b.cc>1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL value)) cc)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08') (= (. (TOK_TABLE_OR_COL a) hr) '11'))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))) (TOK_DISTRIBUTEBY (. (TOK_TABLE_OR_COL a) key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEDESC (. (TOK_TABLE_OR_COL a) cc))))) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) cc))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL b) cc) 1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -36,71 +33,47 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b:a:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: +- - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col1 > 1) - type: boolean + predicate: (_col1 > 1) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -302,9 +275,6 @@ FROM ( ) sub WHERE sub.tag_student > 0 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL key)) user_id) (TOK_SELEXPR (TOK_FUNCTION WHEN (OR (LIKE (TOK_TABLE_OR_COL value) 'aaa%') (LIKE (TOK_TABLE_OR_COL value) 'vvv%')) 1 0) tag_student)))) sub)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL user_id))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL sub) tag_student) 0)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -312,29 +282,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - sub:srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: CASE WHEN (((value like 'aaa%') or (value like 'vvv%'))) THEN (1) ELSE (0) END - type: int + expressions: UDFToInteger(key) (type: int), CASE WHEN (((value like 'aaa%') or (value like 'vvv%'))) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col1 > 0) - type: boolean + predicate: (_col1 > 0) (type: boolean) + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -350,9 +315,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.key, x.value as v1, y.key FROM SRC x JOIN SRC y ON (x.key = y.key) where x.key = 20 CLUSTER BY v1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME SRC) x) (TOK_TABREF (TOK_TABNAME SRC) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) v1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL v1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -361,47 +323,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: y + alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -409,20 +355,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -430,30 +370,20 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -491,9 +421,6 @@ from ( sort by a.key,a.cc desc) b where b.cc>1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL value)) cc)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08') (= (. (TOK_TABLE_OR_COL a) hr) '11'))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))) (TOK_DISTRIBUTEBY (. (TOK_TABLE_OR_COL a) key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEDESC (. (TOK_TABLE_OR_COL a) cc))))) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) cc))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL b) cc) 1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -501,75 +428,50 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b:a:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((ds = '2008-04-08') and (hr = '11')) - type: boolean + predicate: ((ds = '2008-04-08') and (hr = '11')) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(value) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(value) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: +- - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col1 > 1) - type: boolean + predicate: (_col1 > 1) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_clusterby.q.out ql/src/test/results/clientpositive/ppd_clusterby.q.out index cfc63ea..cc08731 100644 --- ql/src/test/results/clientpositive/ppd_clusterby.q.out +++ ql/src/test/results/clientpositive/ppd_clusterby.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM SRC x where x.key = 10 CLUSTER BY x.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 10)) (TOK_CLUSTERBY (. (TOK_TABLE_OR_COL x) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,40 +11,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 10) - type: boolean + predicate: (key = 10) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -72,9 +58,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.key, x.value as v1, y.key FROM SRC x JOIN SRC y ON (x.key = y.key) where x.key = 20 CLUSTER BY v1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME SRC) x) (TOK_TABREF (TOK_TABNAME SRC) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) v1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL v1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -83,47 +66,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: y + alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -131,24 +98,17 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 = 20) - type: boolean + predicate: (_col0 = 20) (type: boolean) + Statistics: Num rows: 15 Data size: 1546 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 1546 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -156,30 +116,20 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 15 Data size: 1546 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 15 Data size: 1546 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 15 Data size: 1546 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -204,9 +154,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM SRC x where x.key = 10 CLUSTER BY x.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 10)) (TOK_CLUSTERBY (. (TOK_TABLE_OR_COL x) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -214,40 +161,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 10) - type: boolean + predicate: (key = 10) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -272,9 +208,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.key, x.value as v1, y.key FROM SRC x JOIN SRC y ON (x.key = y.key) where x.key = 20 CLUSTER BY v1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME SRC) x) (TOK_TABREF (TOK_TABNAME SRC) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) v1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL v1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -283,47 +216,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan - alias: x + alias: y + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - y + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: y + alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 20) - type: boolean + predicate: (key = 20) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -331,20 +248,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -352,30 +263,20 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_constant_expr.q.out ql/src/test/results/clientpositive/ppd_constant_expr.q.out index 616e5ac..5b0f3c2 100644 --- ql/src/test/results/clientpositive/ppd_constant_expr.q.out +++ ql/src/test/results/clientpositive/ppd_constant_expr.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE ppd_constant_expr SELECT 4 + NULL, src1.key - NULL, NULL + NULL POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME ppd_constant_expr))) (TOK_SELECT (TOK_SELEXPR (+ 4 TOK_NULL)) (TOK_SELEXPR (- (. (TOK_TABLE_OR_COL src1) key) TOK_NULL)) (TOK_SELEXPR (+ TOK_NULL TOK_NULL))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -27,22 +24,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1 + Map Operator Tree: TableScan alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (4 + null) - type: double - expr: UDFToInteger((key - null)) - type: int - expr: (null + null) - type: double + expressions: (4 + null) (type: double), UDFToInteger((key - null)) (type: int), (null + null) (type: double) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -73,12 +65,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -87,12 +77,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -165,9 +153,6 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: ppd_constant_expr.c1 EXPRESSION [] POSTHOOK: Lineage: ppd_constant_expr.c2 EXPRESSION [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ppd_constant_expr.c3 EXPRESSION [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME ppd_constant_expr))) (TOK_SELECT (TOK_SELEXPR (+ 4 TOK_NULL)) (TOK_SELEXPR (- (. (TOK_TABLE_OR_COL src1) key) TOK_NULL)) (TOK_SELEXPR (+ TOK_NULL TOK_NULL))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -181,22 +166,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1 + Map Operator Tree: TableScan alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (4 + null) - type: double - expr: UDFToInteger((key - null)) - type: int - expr: (null + null) - type: double + expressions: (4 + null) (type: double), UDFToInteger((key - null)) (type: int), (null + null) (type: double) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -227,12 +207,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -241,12 +219,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_constant_where.q.out ql/src/test/results/clientpositive/ppd_constant_where.q.out index 524ed5a..b1d5d6d 100644 --- ql/src/test/results/clientpositive/ppd_constant_where.q.out +++ ql/src/test/results/clientpositive/ppd_constant_where.q.out @@ -6,9 +6,6 @@ POSTHOOK: query: -- Test that the partition pruner does not fail when there is a EXPLAIN SELECT COUNT(*) FROM srcpart WHERE ds = '2008-04-08' and 'a' = 'a' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= 'a' 'a'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -16,42 +13,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Filter Operator - predicate: - expr: ('a' = 'a') - type: boolean + predicate: ('a' = 'a') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_gby.q.out ql/src/test/results/clientpositive/ppd_gby.q.out index 5908450..68092e0 100644 --- ql/src/test/results/clientpositive/ppd_gby.q.out +++ ql/src/test/results/clientpositive/ppd_gby.q.out @@ -10,9 +10,6 @@ FROM (SELECT src.value as c1, count(src.key) as c2 from src where src.value > 'val_10' group by src.value) src1 WHERE src1.c1 > 'val_200' and (src1.c2 > 30 or src1.c1 < 'val_400') POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c1) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) key)) c2)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) value) 'val_10')) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) value)))) src1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) c1))) (TOK_WHERE (and (> (. (TOK_TABLE_OR_COL src1) c1) 'val_200') (or (> (. (TOK_TABLE_OR_COL src1) c2) 30) (< (. (TOK_TABLE_OR_COL src1) c1) 'val_400')))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -20,71 +17,50 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((value > 'val_10') and (value > 'val_200')) - type: boolean + predicate: ((value > 'val_10') and (value > 'val_200')) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(key) - bucketGroup: false - keys: - expr: value - type: string + aggregations: count(key) + keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((_col0 > 'val_200') and ((_col1 > 30) or (_col0 < 'val_400'))) - type: boolean + predicate: ((_col0 > 'val_200') and ((_col1 > 30) or (_col0 < 'val_400'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -249,9 +225,6 @@ FROM (SELECT src.value as c1, count(src.key) as c2 from src where src.value > 'val_10' group by src.value) src1 WHERE src1.c1 > 'val_200' and (src1.c2 > 30 or src1.c1 < 'val_400') POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c1) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) key)) c2)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) value) 'val_10')) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) value)))) src1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) c1))) (TOK_WHERE (and (> (. (TOK_TABLE_OR_COL src1) c1) 'val_200') (or (> (. (TOK_TABLE_OR_COL src1) c2) 30) (< (. (TOK_TABLE_OR_COL src1) c1) 'val_400')))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -259,64 +232,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((value > 'val_10') and (value > 'val_200')) - type: boolean + predicate: ((value > 'val_10') and (value > 'val_200')) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(key) - bucketGroup: false - keys: - expr: value - type: string + aggregations: count(key) + keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((_col0 > 'val_200') and ((_col1 > 30) or (_col0 < 'val_400'))) - type: boolean + predicate: ((_col0 > 'val_200') and ((_col1 > 30) or (_col0 < 'val_400'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_gby2.q.out ql/src/test/results/clientpositive/ppd_gby2.q.out index bdd7e89..a8ccace 100644 --- ql/src/test/results/clientpositive/ppd_gby2.q.out +++ ql/src/test/results/clientpositive/ppd_gby2.q.out @@ -12,9 +12,6 @@ FROM WHERE src1.c1 > 'val_200' AND (src1.c2 > 30 OR src1.c1 < 'val_400') GROUP BY src1.c2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c1) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) key)) c2)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) value) 'val_10')) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) value)))) src1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION max (. (TOK_TABLE_OR_COL src1) c1))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) c2))) (TOK_WHERE (AND (> (. (TOK_TABLE_OR_COL src1) c1) 'val_200') (OR (> (. (TOK_TABLE_OR_COL src1) c2) 30) (< (. (TOK_TABLE_OR_COL src1) c1) 'val_400')))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src1) c2)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -23,82 +20,55 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((value > 'val_10') and (value > 'val_200')) - type: boolean + predicate: ((value > 'val_10') and (value > 'val_200')) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(key) - bucketGroup: false - keys: - expr: value - type: string + aggregations: count(key) + keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((_col0 > 'val_200') and ((_col1 > 30) or (_col0 < 'val_400'))) - type: boolean + predicate: ((_col0 > 'val_200') and ((_col1 > 30) or (_col0 < 'val_400'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: max(_col0) - bucketGroup: false - keys: - expr: _col1 - type: bigint + aggregations: max(_col0) + keys: _col1 (type: bigint) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -106,41 +76,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: bigint + key expressions: _col0 (type: bigint) sort order: + - Map-reduce partition columns: - expr: _col0 - type: bigint - tag: -1 - value expressions: - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: bigint + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col0 - type: bigint + expressions: _col1 (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -185,9 +142,6 @@ FROM WHERE src1.c1 > 'val_200' AND (src1.c2 > 30 OR src1.c1 < 'val_400') GROUP BY src1.c2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c1) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) key)) c2)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) value) 'val_10')) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) value)))) src1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION max (. (TOK_TABLE_OR_COL src1) c1))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) c2))) (TOK_WHERE (AND (> (. (TOK_TABLE_OR_COL src1) c1) 'val_200') (OR (> (. (TOK_TABLE_OR_COL src1) c2) 30) (< (. (TOK_TABLE_OR_COL src1) c1) 'val_400')))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src1) c2)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -196,75 +150,51 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((value > 'val_10') and (value > 'val_200')) - type: boolean + predicate: ((value > 'val_10') and (value > 'val_200')) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(key) - bucketGroup: false - keys: - expr: value - type: string + aggregations: count(key) + keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((_col0 > 'val_200') and ((_col1 > 30) or (_col0 < 'val_400'))) - type: boolean + predicate: ((_col0 > 'val_200') and ((_col1 > 30) or (_col0 < 'val_400'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: max(_col0) - bucketGroup: false - keys: - expr: _col1 - type: bigint + aggregations: max(_col0) + keys: _col1 (type: bigint) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -272,41 +202,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: bigint + key expressions: _col0 (type: bigint) sort order: + - Map-reduce partition columns: - expr: _col0 - type: bigint - tag: -1 - value expressions: - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: bigint + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col0 - type: bigint + expressions: _col1 (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_gby_join.q.out ql/src/test/results/clientpositive/ppd_gby_join.q.out index 05cb895..8eb0683 100644 --- ql/src/test/results/clientpositive/ppd_gby_join.q.out +++ ql/src/test/results/clientpositive/ppd_gby_join.q.out @@ -18,9 +18,6 @@ ON src1.c1 = src2.c3 AND src1.c1 < '400' WHERE src1.c1 > '20' AND (src1.c2 < 'val_50' OR src1.c1 > '2') AND (src2.c3 > '50' OR src1.c1 < '50') AND (src2.c3 <> '4') GROUP BY src1.c1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c2)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) '1')))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c4)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) '2')))) src2) (AND (= (. (TOK_TABLE_OR_COL src1) c1) (. (TOK_TABLE_OR_COL src2) c3)) (< (. (TOK_TABLE_OR_COL src1) c1) '400')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) c1)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL src1) c1) '20') (OR (< (. (TOK_TABLE_OR_COL src1) c2) 'val_50') (> (. (TOK_TABLE_OR_COL src1) c1) '2'))) (OR (> (. (TOK_TABLE_OR_COL src2) c3) '50') (< (. (TOK_TABLE_OR_COL src1) c1) '50'))) (<> (. (TOK_TABLE_OR_COL src2) c3) '4'))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src1) c1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -29,67 +26,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) - type: boolean + predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < '400') - type: boolean + predicate: (_col0 < '400') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - src2:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) - type: boolean + predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < '400') - type: boolean + predicate: (_col0 < '400') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -97,29 +72,23 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) - type: boolean + predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -127,41 +96,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -191,9 +147,6 @@ ON src1.c1 = src2.c3 AND src1.c1 < '400' WHERE src1.c1 > '20' AND (src1.c2 < 'val_50' OR src1.c1 > '2') AND (src2.c3 > '50' OR src1.c1 < '50') AND (src2.c3 <> '4') GROUP BY src1.c1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c2)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) '1')))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c4)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) '2')))) src2) (AND (= (. (TOK_TABLE_OR_COL src1) c1) (. (TOK_TABLE_OR_COL src2) c3)) (< (. (TOK_TABLE_OR_COL src1) c1) '400')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) c1)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL src1) c1) '20') (OR (< (. (TOK_TABLE_OR_COL src1) c2) 'val_50') (> (. (TOK_TABLE_OR_COL src1) c1) '2'))) (OR (> (. (TOK_TABLE_OR_COL src2) c3) '50') (< (. (TOK_TABLE_OR_COL src1) c1) '50'))) (<> (. (TOK_TABLE_OR_COL src2) c3) '4'))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src1) c1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -202,59 +155,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) - type: boolean + predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - src2:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) - type: boolean + predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -262,29 +195,23 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) - type: boolean + predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -292,41 +219,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_join.q.out ql/src/test/results/clientpositive/ppd_join.q.out index 6cdf8d9..53a4ee5 100644 --- ql/src/test/results/clientpositive/ppd_join.q.out +++ ql/src/test/results/clientpositive/ppd_join.q.out @@ -16,9 +16,6 @@ JOIN ON src1.c1 = src2.c3 AND src1.c1 < '400' WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4') POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c2)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) '1')))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c4)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) '2')))) src2) (AND (= (. (TOK_TABLE_OR_COL src1) c1) (. (TOK_TABLE_OR_COL src2) c3)) (< (. (TOK_TABLE_OR_COL src1) c1) '400')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) c1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) c4))) (TOK_WHERE (and (and (and (> (. (TOK_TABLE_OR_COL src1) c1) '20') (or (< (. (TOK_TABLE_OR_COL src1) c2) 'val_50') (> (. (TOK_TABLE_OR_COL src1) c1) '2'))) (or (> (. (TOK_TABLE_OR_COL src2) c3) '50') (< (. (TOK_TABLE_OR_COL src1) c1) '50'))) (<> (. (TOK_TABLE_OR_COL src2) c3) '4'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -26,71 +23,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) - type: boolean + predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < '400') - type: boolean + predicate: (_col0 < '400') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - src2:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) - type: boolean + predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < '400') - type: boolean + predicate: (_col0 < '400') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -98,22 +69,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) - type: boolean + predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -579,9 +546,6 @@ JOIN ON src1.c1 = src2.c3 AND src1.c1 < '400' WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4') POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c2)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) '1')))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c4)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) '2')))) src2) (AND (= (. (TOK_TABLE_OR_COL src1) c1) (. (TOK_TABLE_OR_COL src2) c3)) (< (. (TOK_TABLE_OR_COL src1) c1) '400')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) c1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) c4))) (TOK_WHERE (and (and (and (> (. (TOK_TABLE_OR_COL src1) c1) '20') (or (< (. (TOK_TABLE_OR_COL src1) c2) 'val_50') (> (. (TOK_TABLE_OR_COL src1) c1) '2'))) (or (> (. (TOK_TABLE_OR_COL src2) c3) '50') (< (. (TOK_TABLE_OR_COL src1) c1) '50'))) (<> (. (TOK_TABLE_OR_COL src2) c3) '4'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -589,63 +553,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) - type: boolean + predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - src2:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) - type: boolean + predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -653,22 +593,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) - type: boolean + predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_join2.q.out ql/src/test/results/clientpositive/ppd_join2.q.out index caec608..d783338 100644 --- ql/src/test/results/clientpositive/ppd_join2.q.out +++ ql/src/test/results/clientpositive/ppd_join2.q.out @@ -22,9 +22,6 @@ JOIN ON src1.c2 = src3.c6 WHERE src1.c1 <> '311' and (src1.c2 <> 'val_50' or src1.c1 > '1') and (src2.c3 <> '10' or src1.c1 <> '10') and (src2.c3 <> '14') and (sqrt(src3.c5) <> 13) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c2)) (TOK_WHERE (<> (. (TOK_TABLE_OR_COL src) key) '302')))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c4)) (TOK_WHERE (<> (. (TOK_TABLE_OR_COL src) key) '305')))) src2) (AND (= (. (TOK_TABLE_OR_COL src1) c1) (. (TOK_TABLE_OR_COL src2) c3)) (< (. (TOK_TABLE_OR_COL src1) c1) '400'))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c5) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c6)) (TOK_WHERE (<> (. (TOK_TABLE_OR_COL src) key) '306')))) src3) (= (. (TOK_TABLE_OR_COL src1) c2) (. (TOK_TABLE_OR_COL src3) c6)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) c1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) c4))) (TOK_WHERE (and (and (and (and (<> (. (TOK_TABLE_OR_COL src1) c1) '311') (or (<> (. (TOK_TABLE_OR_COL src1) c2) 'val_50') (> (. (TOK_TABLE_OR_COL src1) c1) '1'))) (or (<> (. (TOK_TABLE_OR_COL src2) c3) '10') (<> (. (TOK_TABLE_OR_COL src1) c1) '10'))) (<> (. (TOK_TABLE_OR_COL src2) c3) '14')) (<> (TOK_FUNCTION sqrt (. (TOK_TABLE_OR_COL src3) c5)) 13))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -33,71 +30,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((key <> '302') and (key < '400')) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) - type: boolean + predicate: ((((key <> '305') and (key < '400')) and (key <> '14')) and (key <> '311')) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < '400') - type: boolean + predicate: (_col0 < '400') (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - src2:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((key <> '305') and (key < '400')) and (key <> '14')) and (key <> '311')) - type: boolean + predicate: (((((key <> '302') and (key < '400')) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < '400') - type: boolean + predicate: (_col0 < '400') (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -105,11 +76,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -117,53 +87,30 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - src3:src + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 4 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key <> '306') and (sqrt(key) <> 13)) - type: boolean + predicate: ((key <> '306') and (sqrt(key) <> 13)) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -171,22 +118,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((_col2 <> '311') and ((_col3 <> 'val_50') or (_col2 > '1'))) and ((_col0 <> '10') or (_col2 <> '10'))) and (_col0 <> '14')) and (sqrt(_col4) <> 13)) - type: boolean + predicate: (((((_col2 <> '311') and ((_col3 <> 'val_50') or (_col2 > '1'))) and ((_col0 <> '10') or (_col2 <> '10'))) and (_col0 <> '14')) and (sqrt(_col4) <> 13)) (type: boolean) + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col1 - type: string + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1764,9 +1707,6 @@ JOIN ON src1.c2 = src3.c6 WHERE src1.c1 <> '311' and (src1.c2 <> 'val_50' or src1.c1 > '1') and (src2.c3 <> '10' or src1.c1 <> '10') and (src2.c3 <> '14') and (sqrt(src3.c5) <> 13) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c2)) (TOK_WHERE (<> (. (TOK_TABLE_OR_COL src) key) '302')))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c4)) (TOK_WHERE (<> (. (TOK_TABLE_OR_COL src) key) '305')))) src2) (AND (= (. (TOK_TABLE_OR_COL src1) c1) (. (TOK_TABLE_OR_COL src2) c3)) (< (. (TOK_TABLE_OR_COL src1) c1) '400'))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c5) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c6)) (TOK_WHERE (<> (. (TOK_TABLE_OR_COL src) key) '306')))) src3) (= (. (TOK_TABLE_OR_COL src1) c2) (. (TOK_TABLE_OR_COL src3) c6)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) c1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) c4))) (TOK_WHERE (and (and (and (and (<> (. (TOK_TABLE_OR_COL src1) c1) '311') (or (<> (. (TOK_TABLE_OR_COL src1) c2) 'val_50') (> (. (TOK_TABLE_OR_COL src1) c1) '1'))) (or (<> (. (TOK_TABLE_OR_COL src2) c3) '10') (<> (. (TOK_TABLE_OR_COL src1) c1) '10'))) (<> (. (TOK_TABLE_OR_COL src2) c3) '14')) (<> (TOK_FUNCTION sqrt (. (TOK_TABLE_OR_COL src3) c5)) 13))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1775,63 +1715,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((key <> '302') and (key < '400')) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) - type: boolean + predicate: ((((key <> '305') and (key < '400')) and (key <> '14')) and (key <> '311')) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - src2:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((key <> '305') and (key < '400')) and (key <> '14')) and (key <> '311')) - type: boolean + predicate: (((((key <> '302') and (key < '400')) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1839,11 +1755,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 2644 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1851,53 +1766,30 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - src3:src + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 13 Data size: 2644 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key <> '306') and (sqrt(key) <> 13)) - type: boolean + predicate: ((key <> '306') and (sqrt(key) <> 13)) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1905,22 +1797,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((_col2 <> '311') and ((_col3 <> 'val_50') or (_col2 > '1'))) and ((_col0 <> '10') or (_col2 <> '10'))) and (_col0 <> '14')) and (sqrt(_col4) <> 13)) - type: boolean + predicate: (((((_col2 <> '311') and ((_col3 <> 'val_50') or (_col2 > '1'))) and ((_col0 <> '10') or (_col2 <> '10'))) and (_col0 <> '14')) and (sqrt(_col4) <> 13)) (type: boolean) + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col1 - type: string + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_join3.q.out ql/src/test/results/clientpositive/ppd_join3.q.out index e2c8c86..70b2f4c 100644 --- ql/src/test/results/clientpositive/ppd_join3.q.out +++ ql/src/test/results/clientpositive/ppd_join3.q.out @@ -22,9 +22,6 @@ JOIN ON src1.c1 = src3.c5 WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1') POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c2)) (TOK_WHERE (<> (. (TOK_TABLE_OR_COL src) key) '11')))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c4)) (TOK_WHERE (<> (. (TOK_TABLE_OR_COL src) key) '12')))) src2) (AND (= (. (TOK_TABLE_OR_COL src1) c1) (. (TOK_TABLE_OR_COL src2) c3)) (< (. (TOK_TABLE_OR_COL src1) c1) '400'))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c5) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c6)) (TOK_WHERE (<> (. (TOK_TABLE_OR_COL src) key) '13')))) src3) (= (. (TOK_TABLE_OR_COL src1) c1) (. (TOK_TABLE_OR_COL src3) c5)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) c1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) c4))) (TOK_WHERE (and (and (and (and (> (. (TOK_TABLE_OR_COL src1) c1) '0') (or (<> (. (TOK_TABLE_OR_COL src1) c2) 'val_500') (> (. (TOK_TABLE_OR_COL src1) c1) '1'))) (or (> (. (TOK_TABLE_OR_COL src2) c3) '10') (<> (. (TOK_TABLE_OR_COL src1) c1) '10'))) (<> (. (TOK_TABLE_OR_COL src2) c3) '4')) (<> (. (TOK_TABLE_OR_COL src3) c5) '1'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -32,99 +29,64 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((((key <> '11') and (key < '400')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) - type: boolean + predicate: (((((key <> '12') and (key < '400')) and (key <> '4')) and (key > '0')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < '400') - type: boolean + predicate: (_col0 < '400') (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - src2:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((key <> '12') and (key < '400')) and (key <> '4')) and (key > '0')) and (key <> '1')) - type: boolean + predicate: ((((((key <> '11') and (key < '400')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < '400') - type: boolean + predicate: (_col0 < '400') (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - src3:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((key <> '13') and (key < '400')) and (key <> '1')) and (key > '0')) and (key <> '4')) - type: boolean + predicate: (((((key <> '13') and (key < '400')) and (key <> '1')) and (key > '0')) and (key <> '4')) (type: boolean) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < '400') - type: boolean + predicate: (_col0 < '400') (type: boolean) + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 2 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -134,22 +96,18 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4 Data size: 440 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((_col0 > '0') and ((_col1 <> 'val_500') or (_col0 > '1'))) and ((_col2 > '10') or (_col0 <> '10'))) and (_col2 <> '4')) and (_col4 <> '1')) - type: boolean + predicate: (((((_col0 > '0') and ((_col1 <> 'val_500') or (_col0 > '1'))) and ((_col2 > '10') or (_col0 <> '10'))) and (_col2 <> '4')) and (_col4 <> '1')) (type: boolean) + Statistics: Num rows: 1 Data size: 110 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 110 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 110 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1784,9 +1742,6 @@ JOIN ON src1.c1 = src3.c5 WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1') POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c2)) (TOK_WHERE (<> (. (TOK_TABLE_OR_COL src) key) '11')))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c4)) (TOK_WHERE (<> (. (TOK_TABLE_OR_COL src) key) '12')))) src2) (AND (= (. (TOK_TABLE_OR_COL src1) c1) (. (TOK_TABLE_OR_COL src2) c3)) (< (. (TOK_TABLE_OR_COL src1) c1) '400'))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c5) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c6)) (TOK_WHERE (<> (. (TOK_TABLE_OR_COL src) key) '13')))) src3) (= (. (TOK_TABLE_OR_COL src1) c1) (. (TOK_TABLE_OR_COL src3) c5)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) c1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) c4))) (TOK_WHERE (and (and (and (and (> (. (TOK_TABLE_OR_COL src1) c1) '0') (or (<> (. (TOK_TABLE_OR_COL src1) c2) 'val_500') (> (. (TOK_TABLE_OR_COL src1) c1) '1'))) (or (> (. (TOK_TABLE_OR_COL src2) c3) '10') (<> (. (TOK_TABLE_OR_COL src1) c1) '10'))) (<> (. (TOK_TABLE_OR_COL src2) c3) '4')) (<> (. (TOK_TABLE_OR_COL src3) c5) '1'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1794,87 +1749,55 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((((key <> '11') and (key < '400')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) - type: boolean + predicate: (((((key <> '12') and (key < '400')) and (key <> '4')) and (key > '0')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - src2:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((key <> '12') and (key < '400')) and (key <> '4')) and (key > '0')) and (key <> '1')) - type: boolean + predicate: ((((((key <> '11') and (key < '400')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - src3:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((key <> '13') and (key < '400')) and (key <> '1')) and (key > '0')) and (key <> '4')) - type: boolean + predicate: (((((key <> '13') and (key < '400')) and (key <> '1')) and (key > '0')) and (key <> '4')) (type: boolean) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 2 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1884,22 +1807,18 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13 Data size: 1322 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((_col0 > '0') and ((_col1 <> 'val_500') or (_col0 > '1'))) and ((_col2 > '10') or (_col0 <> '10'))) and (_col2 <> '4')) and (_col4 <> '1')) - type: boolean + predicate: (((((_col0 > '0') and ((_col1 <> 'val_500') or (_col0 > '1'))) and ((_col2 > '10') or (_col0 <> '10'))) and (_col2 <> '4')) and (_col4 <> '1')) (type: boolean) + Statistics: Num rows: 6 Data size: 610 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 610 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 6 Data size: 610 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_join_filter.q.out ql/src/test/results/clientpositive/ppd_join_filter.q.out index da2bed0..09ac35d 100644 --- ql/src/test/results/clientpositive/ppd_join_filter.q.out +++ ql/src/test/results/clientpositive/ppd_join_filter.q.out @@ -25,7 +25,99 @@ group by key on a.key=b.key and b.k1 < 5 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) k) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 1) k1) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 2) k2) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 3) k3)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (< (. (TOK_TABLE_OR_COL b) k1) 5)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k3))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + k + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 1 + k1 + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 2 + k2 + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 3 + k3 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + < + . + TOK_TABLE_OR_COL + b + k1 + 5 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k2 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k3 + STAGE DEPENDENCIES: Stage-2 is a root stage @@ -35,45 +127,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - b:src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: min(key) - bucketGroup: false - keys: - expr: key - type: string + aggregations: min(key) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col1 - type: string + value expressions: _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -124,36 +199,19 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: min(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 2906 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: (_col1 + 1) - type: double - expr: (_col1 + 2) - type: double - expr: (_col1 + 3) - type: double + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: - numRows: 29 dataSize: 2906 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: (_col2 < 5.0) - type: boolean - Statistics: - numRows: 9 dataSize: 901 basicStatsState: COMPLETE colStatsState: NONE + predicate: (_col2 < 5.0) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -174,46 +232,27 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 9 dataSize: 901 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: _col3 - type: double - expr: _col4 - type: double - a + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string) TableScan - alias: a - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -290,28 +329,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col3} {VALUE._col4} - handleSkewJoin: false outputColumnNames: _col0, _col7, _col8 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col7 - type: double - expr: _col8 - type: double + expressions: _col0 (type: string), _col7 (type: double), _col8 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -393,7 +422,99 @@ group by key on a.key=b.key and b.k1 < 5 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) k) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 1) k1) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 2) k2) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 3) k3)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (< (. (TOK_TABLE_OR_COL b) k1) 5)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k3))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + k + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 1 + k1 + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 2 + k2 + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 3 + k3 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + < + . + TOK_TABLE_OR_COL + b + k1 + 5 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k2 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k3 + STAGE DEPENDENCIES: Stage-2 is a root stage @@ -403,45 +524,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - b:src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: min(key) - bucketGroup: false - keys: - expr: key - type: string + aggregations: min(key) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col1 - type: string + value expressions: _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -492,36 +596,19 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: min(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 2906 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: (_col1 + 1) - type: double - expr: (_col1 + 2) - type: double - expr: (_col1 + 3) - type: double + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: - numRows: 29 dataSize: 2906 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: (_col2 < 5.0) - type: boolean - Statistics: - numRows: 9 dataSize: 901 basicStatsState: COMPLETE colStatsState: NONE + predicate: (_col2 < 5.0) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -542,46 +629,27 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 9 dataSize: 901 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: _col3 - type: double - expr: _col4 - type: double - a + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string) TableScan - alias: a - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -658,28 +726,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col3} {VALUE._col4} - handleSkewJoin: false outputColumnNames: _col0, _col7, _col8 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col7 - type: double - expr: _col8 - type: double + expressions: _col0 (type: string), _col7 (type: double), _col8 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -761,7 +819,99 @@ group by key on a.key=b.key and b.k1 < 5 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) k) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 1) k1) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 2) k2) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 3) k3)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (< (. (TOK_TABLE_OR_COL b) k1) 5)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k3))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + k + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 1 + k1 + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 2 + k2 + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 3 + k3 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + < + . + TOK_TABLE_OR_COL + b + k1 + 5 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k2 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k3 + STAGE DEPENDENCIES: Stage-2 is a root stage @@ -771,45 +921,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - b:src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: min(key) - bucketGroup: false - keys: - expr: key - type: string + aggregations: min(key) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col1 - type: string + value expressions: _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -860,36 +993,19 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: min(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 2906 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: (_col1 + 1) - type: double - expr: (_col1 + 2) - type: double - expr: (_col1 + 3) - type: double + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: - numRows: 29 dataSize: 2906 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: (_col2 < 5.0) - type: boolean - Statistics: - numRows: 9 dataSize: 901 basicStatsState: COMPLETE colStatsState: NONE + predicate: (_col2 < 5.0) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -910,46 +1026,27 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 9 dataSize: 901 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: _col3 - type: double - expr: _col4 - type: double - a + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string) TableScan - alias: a - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1026,28 +1123,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col3} {VALUE._col4} - handleSkewJoin: false outputColumnNames: _col0, _col7, _col8 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col7 - type: double - expr: _col8 - type: double + expressions: _col0 (type: string), _col7 (type: double), _col8 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1129,7 +1216,99 @@ group by key on a.key=b.key and b.k1 < 5 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) k) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 1) k1) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 2) k2) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 3) k3)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (< (. (TOK_TABLE_OR_COL b) k1) 5)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k3))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + k + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 1 + k1 + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 2 + k2 + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 3 + k3 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + < + . + TOK_TABLE_OR_COL + b + k1 + 5 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k2 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k3 + STAGE DEPENDENCIES: Stage-2 is a root stage @@ -1139,45 +1318,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - b:src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: min(key) - bucketGroup: false - keys: - expr: key - type: string + aggregations: min(key) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col1 - type: string + value expressions: _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1228,36 +1390,19 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: min(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 2906 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: (_col1 + 1) - type: double - expr: (_col1 + 2) - type: double - expr: (_col1 + 3) - type: double + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: - numRows: 29 dataSize: 2906 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: (_col2 < 5.0) - type: boolean - Statistics: - numRows: 9 dataSize: 901 basicStatsState: COMPLETE colStatsState: NONE + predicate: (_col2 < 5.0) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -1278,46 +1423,27 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 9 dataSize: 901 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: _col3 - type: double - expr: _col4 - type: double - a + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string) TableScan - alias: a - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1394,28 +1520,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col3} {VALUE._col4} - handleSkewJoin: false outputColumnNames: _col0, _col7, _col8 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col7 - type: double - expr: _col8 - type: double + expressions: _col0 (type: string), _col7 (type: double), _col8 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/ppd_multi_insert.q.out ql/src/test/results/clientpositive/ppd_multi_insert.q.out index 83d844e..2e6f877 100644 --- ql/src/test/results/clientpositive/ppd_multi_insert.q.out +++ ql/src/test/results/clientpositive/ppd_multi_insert.q.out @@ -27,9 +27,6 @@ INSERT OVERWRITE TABLE mi2 SELECT a.key, a.value WHERE a.key >= 100 and a.key < INSERT OVERWRITE TABLE mi3 PARTITION(ds='2008-04-08', hr='12') SELECT a.key WHERE a.key >= 200 and a.key < 300 INSERT OVERWRITE DIRECTORY 'target/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME mi1))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 100))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME mi2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (and (>= (. (TOK_TABLE_OR_COL a) key) 100) (< (. (TOK_TABLE_OR_COL a) key) 200)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME mi3) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr '12')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (>= (. (TOK_TABLE_OR_COL a) key) 200) (< (. (TOK_TABLE_OR_COL a) key) 300)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR 'target/warehouse/mi4.out')) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (>= (. (TOK_TABLE_OR_COL a) key) 300)))) - STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 @@ -43,36 +40,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -80,75 +65,63 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 100) - type: boolean + predicate: (_col0 < 100) (type: boolean) + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 Filter Operator - predicate: - expr: ((_col0 >= 100) and (_col0 < 200)) - type: boolean + predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 Filter Operator - predicate: - expr: ((_col0 >= 200) and (_col0 < 300)) - type: boolean + predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int + expressions: UDFToInteger(_col0) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 Filter Operator - predicate: - expr: (_col0 >= 300) - type: boolean + predicate: (_col0 >= 300) (type: boolean) + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 4 + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1317,9 +1290,6 @@ POSTHOOK: Lineage: mi1.value SIMPLE [(src)a.FieldSchema(name:value, type:string, POSTHOOK: Lineage: mi2.key EXPRESSION [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: mi2.value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: mi3 PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(src)a.FieldSchema(name:key, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME mi1))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 100))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME mi2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (and (>= (. (TOK_TABLE_OR_COL a) key) 100) (< (. (TOK_TABLE_OR_COL a) key) 200)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME mi3) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr '12')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (>= (. (TOK_TABLE_OR_COL a) key) 200) (< (. (TOK_TABLE_OR_COL a) key) 300)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR 'target/warehouse/mi4.out')) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (>= (. (TOK_TABLE_OR_COL a) key) 300)))) - STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 @@ -1333,36 +1303,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1370,75 +1328,63 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 100) - type: boolean + predicate: (_col0 < 100) (type: boolean) + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 Filter Operator - predicate: - expr: ((_col0 >= 100) and (_col0 < 200)) - type: boolean + predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 Filter Operator - predicate: - expr: ((_col0 >= 200) and (_col0 < 300)) - type: boolean + predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int + expressions: UDFToInteger(_col0) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 3 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 Filter Operator - predicate: - expr: (_col0 >= 300) - type: boolean + predicate: (_col0 >= 300) (type: boolean) + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 4 + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_outer_join1.q.out ql/src/test/results/clientpositive/ppd_outer_join1.q.out index ac46175..183babe 100644 --- ql/src/test/results/clientpositive/ppd_outer_join1.q.out +++ ql/src/test/results/clientpositive/ppd_outer_join1.q.out @@ -16,9 +16,6 @@ POSTHOOK: query: EXPLAIN SELECT a.key, a.value, b.key, b.value WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 10) (< (. (TOK_TABLE_OR_COL a) key) 20)) (> (. (TOK_TABLE_OR_COL b) key) 15)) (< (. (TOK_TABLE_OR_COL b) key) 25))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -26,49 +23,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -76,26 +55,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((_col0 > 10) and (_col0 < 20)) and (_col4 > 15)) and (_col4 < 25)) - type: boolean + predicate: ((((_col0 > 10) and (_col0 < 20)) and (_col4 > 15)) and (_col4 < 25)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -149,9 +120,6 @@ POSTHOOK: query: EXPLAIN SELECT a.key, a.value, b.key, b.value WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 10) (< (. (TOK_TABLE_OR_COL a) key) 20)) (> (. (TOK_TABLE_OR_COL b) key) 15)) (< (. (TOK_TABLE_OR_COL b) key) 25))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -159,49 +127,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -209,26 +159,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((_col4 > 15) and (_col4 < 25)) - type: boolean + predicate: ((_col4 > 15) and (_col4 < 25)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_outer_join2.q.out ql/src/test/results/clientpositive/ppd_outer_join2.q.out index 8158aa1..daa040a 100644 --- ql/src/test/results/clientpositive/ppd_outer_join2.q.out +++ ql/src/test/results/clientpositive/ppd_outer_join2.q.out @@ -16,9 +16,6 @@ POSTHOOK: query: EXPLAIN SELECT a.key, a.value, b.key, b.value WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) '10') (< (. (TOK_TABLE_OR_COL a) key) '20')) (> (. (TOK_TABLE_OR_COL b) key) '15')) (< (. (TOK_TABLE_OR_COL b) key) '25'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -26,49 +23,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > '15') and (key < '25')) - type: boolean + predicate: ((key > '15') and (key < '25')) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > '15') and (key < '25')) - type: boolean + predicate: ((key > '15') and (key < '25')) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -76,26 +55,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((_col0 > '10') and (_col0 < '20')) and (_col4 > '15')) and (_col4 < '25')) - type: boolean + predicate: ((((_col0 > '10') and (_col0 < '20')) and (_col4 > '15')) and (_col4 < '25')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -269,9 +240,6 @@ POSTHOOK: query: EXPLAIN SELECT a.key, a.value, b.key, b.value WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) '10') (< (. (TOK_TABLE_OR_COL a) key) '20')) (> (. (TOK_TABLE_OR_COL b) key) '15')) (< (. (TOK_TABLE_OR_COL b) key) '25'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -279,49 +247,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > '15') and (key < '25')) - type: boolean + predicate: ((key > '15') and (key < '25')) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > '15') and (key < '25')) - type: boolean + predicate: ((key > '15') and (key < '25')) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -329,26 +279,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((_col0 > '10') and (_col0 < '20')) - type: boolean + predicate: ((_col0 > '10') and (_col0 < '20')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_outer_join3.q.out ql/src/test/results/clientpositive/ppd_outer_join3.q.out index 58b35ef..02d06d9 100644 --- ql/src/test/results/clientpositive/ppd_outer_join3.q.out +++ ql/src/test/results/clientpositive/ppd_outer_join3.q.out @@ -16,9 +16,6 @@ POSTHOOK: query: EXPLAIN SELECT a.key, a.value, b.key, b.value WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) '10') (< (. (TOK_TABLE_OR_COL a) key) '20')) (> (. (TOK_TABLE_OR_COL b) key) '15')) (< (. (TOK_TABLE_OR_COL b) key) '25'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -26,41 +23,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -68,26 +49,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((_col0 > '10') and (_col0 < '20')) and (_col4 > '15')) and (_col4 < '25')) - type: boolean + predicate: ((((_col0 > '10') and (_col0 < '20')) and (_col4 > '15')) and (_col4 < '25')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -261,9 +234,6 @@ POSTHOOK: query: EXPLAIN SELECT a.key, a.value, b.key, b.value WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) '10') (< (. (TOK_TABLE_OR_COL a) key) '20')) (> (. (TOK_TABLE_OR_COL b) key) '15')) (< (. (TOK_TABLE_OR_COL b) key) '25'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -271,41 +241,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -313,26 +267,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((_col4 > '15') and (_col4 < '25')) and (_col0 > '10')) and (_col0 < '20')) - type: boolean + predicate: ((((_col4 > '15') and (_col4 < '25')) and (_col0 > '10')) and (_col0 < '20')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_outer_join4.q.out ql/src/test/results/clientpositive/ppd_outer_join4.q.out index 51c1310..aba0b7e 100644 --- ql/src/test/results/clientpositive/ppd_outer_join4.q.out +++ ql/src/test/results/clientpositive/ppd_outer_join4.q.out @@ -22,9 +22,6 @@ POSTHOOK: query: EXPLAIN SELECT a.key, a.value, b.key, b.value, c.key WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME src) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) key))) (TOK_WHERE (AND (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) '10') (< (. (TOK_TABLE_OR_COL a) key) '20')) (> (. (TOK_TABLE_OR_COL b) key) '15')) (< (. (TOK_TABLE_OR_COL b) key) '25')) (<> (TOK_FUNCTION sqrt (. (TOK_TABLE_OR_COL c) key)) 13))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -32,68 +29,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (sqrt(key) <> 13) - type: boolean + predicate: (sqrt(key) <> 13) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: c + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (sqrt(key) <> 13) - type: boolean + predicate: (sqrt(key) <> 13) (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - c + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: c + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (sqrt(key) <> 13) - type: boolean + predicate: (sqrt(key) <> 13) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -103,28 +75,18 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((_col0 > '10') and (_col0 < '20')) and (_col4 > '15')) and (_col4 < '25')) and (sqrt(_col8) <> 13)) - type: boolean + predicate: (((((_col0 > '10') and (_col0 < '20')) and (_col4 > '15')) and (_col4 < '25')) and (sqrt(_col8) <> 13)) (type: boolean) + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -414,9 +376,6 @@ POSTHOOK: query: EXPLAIN SELECT a.key, a.value, b.key, b.value, c.key WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME src) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) key))) (TOK_WHERE (AND (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) '10') (< (. (TOK_TABLE_OR_COL a) key) '20')) (> (. (TOK_TABLE_OR_COL b) key) '15')) (< (. (TOK_TABLE_OR_COL b) key) '25')) (<> (TOK_FUNCTION sqrt (. (TOK_TABLE_OR_COL c) key)) 13))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -424,68 +383,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (sqrt(key) <> 13) - type: boolean + predicate: (sqrt(key) <> 13) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b + alias: c + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (sqrt(key) <> 13) - type: boolean + predicate: (sqrt(key) <> 13) (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - c + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: c + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (sqrt(key) <> 13) - type: boolean + predicate: (sqrt(key) <> 13) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -495,28 +429,18 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((_col4 > '15') and (_col4 < '25')) and (_col0 > '10')) and (_col0 < '20')) - type: boolean + predicate: ((((_col4 > '15') and (_col4 < '25')) and (_col0 > '10')) and (_col0 < '20')) (type: boolean) + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_outer_join5.q.out ql/src/test/results/clientpositive/ppd_outer_join5.q.out index 53e02b2..cdad2af 100644 --- ql/src/test/results/clientpositive/ppd_outer_join5.q.out +++ ql/src/test/results/clientpositive/ppd_outer_join5.q.out @@ -22,9 +22,6 @@ PREHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME t1)) (TOK_TABREF (TOK_TABNAME t2)) (= (. (TOK_TABLE_OR_COL t1) id) (. (TOK_TABLE_OR_COL t2) id))) (TOK_TABREF (TOK_TABNAME t3)) (= (. (TOK_TABLE_OR_COL t2) id) (. (TOK_TABLE_OR_COL t3) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL t3) id) 20)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -32,76 +29,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan - alias: t1 + alias: t3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (id = 20) - type: boolean + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: id - type: int + key expressions: id (type: int) sort order: + - Map-reduce partition columns: - expr: id - type: int - tag: 0 - value expressions: - expr: id - type: int - expr: key - type: string - expr: value - type: string - t2 + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: id (type: int), key (type: string), value (type: string) TableScan alias: t2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (id = 20) - type: boolean + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: id - type: int + key expressions: id (type: int) sort order: + - Map-reduce partition columns: - expr: id - type: int - tag: 1 - value expressions: - expr: id - type: int - expr: key - type: string - expr: value - type: string - t3 + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: id (type: int), key (type: string), value (type: string) TableScan - alias: t3 + alias: t1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (id = 20) - type: boolean + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: id - type: int + key expressions: id (type: int) sort order: + - Map-reduce partition columns: - expr: id - type: int - tag: 2 - value expressions: - expr: id - type: int - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: id (type: int), key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -111,32 +75,15 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7, _col10, _col11, _col12 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: int - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col10 - type: int - expr: _col11 - type: string - expr: _col12 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: string), _col10 (type: int), _col11 (type: string), _col12 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -150,9 +97,6 @@ PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer joi PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME t1)) (TOK_TABREF (TOK_TABNAME t2)) (= (. (TOK_TABLE_OR_COL t1) id) (. (TOK_TABLE_OR_COL t2) id))) (TOK_TABREF (TOK_TABNAME t3)) (= (. (TOK_TABLE_OR_COL t2) id) (. (TOK_TABLE_OR_COL t3) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL t2) id) 20)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -160,76 +104,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan - alias: t1 + alias: t3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (id = 20) - type: boolean + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: id - type: int + key expressions: id (type: int) sort order: + - Map-reduce partition columns: - expr: id - type: int - tag: 0 - value expressions: - expr: id - type: int - expr: key - type: string - expr: value - type: string - t2 + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: id (type: int), key (type: string), value (type: string) TableScan alias: t2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (id = 20) - type: boolean + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: id - type: int + key expressions: id (type: int) sort order: + - Map-reduce partition columns: - expr: id - type: int - tag: 1 - value expressions: - expr: id - type: int - expr: key - type: string - expr: value - type: string - t3 + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: id (type: int), key (type: string), value (type: string) TableScan - alias: t3 + alias: t1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (id = 20) - type: boolean + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: id - type: int + key expressions: id (type: int) sort order: + - Map-reduce partition columns: - expr: id - type: int - tag: 2 - value expressions: - expr: id - type: int - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: id (type: int), key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -239,32 +150,15 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7, _col10, _col11, _col12 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: int - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col10 - type: int - expr: _col11 - type: string - expr: _col12 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: string), _col10 (type: int), _col11 (type: string), _col12 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -278,9 +172,6 @@ PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer joi PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME t1)) (TOK_TABREF (TOK_TABNAME t2)) (= (. (TOK_TABLE_OR_COL t1) id) (. (TOK_TABLE_OR_COL t2) id))) (TOK_TABREF (TOK_TABNAME t3)) (= (. (TOK_TABLE_OR_COL t1) id) (. (TOK_TABLE_OR_COL t3) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL t2) id) 20)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -288,76 +179,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan - alias: t1 + alias: t3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (id = 20) - type: boolean + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: id - type: int + key expressions: id (type: int) sort order: + - Map-reduce partition columns: - expr: id - type: int - tag: 0 - value expressions: - expr: id - type: int - expr: key - type: string - expr: value - type: string - t2 + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: id (type: int), key (type: string), value (type: string) TableScan alias: t2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (id = 20) - type: boolean + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: id - type: int + key expressions: id (type: int) sort order: + - Map-reduce partition columns: - expr: id - type: int - tag: 1 - value expressions: - expr: id - type: int - expr: key - type: string - expr: value - type: string - t3 + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: id (type: int), key (type: string), value (type: string) TableScan - alias: t3 + alias: t1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (id = 20) - type: boolean + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: id - type: int + key expressions: id (type: int) sort order: + - Map-reduce partition columns: - expr: id - type: int - tag: 2 - value expressions: - expr: id - type: int - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: id (type: int), key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -367,32 +225,15 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7, _col10, _col11, _col12 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: int - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col10 - type: int - expr: _col11 - type: string - expr: _col12 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: string), _col10 (type: int), _col11 (type: string), _col12 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_random.q.out ql/src/test/results/clientpositive/ppd_random.q.out index bf28433..deff483 100644 --- ql/src/test/results/clientpositive/ppd_random.q.out +++ ql/src/test/results/clientpositive/ppd_random.q.out @@ -16,9 +16,6 @@ JOIN ON src1.c1 = src2.c3 WHERE rand() > 0.5 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c2)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c4)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) '2')))) src2) (= (. (TOK_TABLE_OR_COL src1) c1) (. (TOK_TABLE_OR_COL src2) c3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) c1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) c4))) (TOK_WHERE (> (TOK_FUNCTION rand) 0.5)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -26,53 +23,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:src - TableScan - alias: src - Select Operator - expressions: - expr: key - type: string - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - src2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '2') - type: boolean + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -80,22 +60,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col3 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (rand() > 0.5) - type: boolean + predicate: (rand() > 0.5) (type: boolean) + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -123,9 +99,6 @@ JOIN ON src1.c1 = src2.c3 WHERE rand() > 0.5 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c2)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c4)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) '2')))) src2) (= (. (TOK_TABLE_OR_COL src1) c1) (. (TOK_TABLE_OR_COL src2) c3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) c1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) c4))) (TOK_WHERE (> (TOK_FUNCTION rand) 0.5)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -133,53 +106,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:src - TableScan - alias: src - Select Operator - expressions: - expr: key - type: string - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - src2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '2') - type: boolean + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -187,22 +143,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col3 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (rand() > 0.5) - type: boolean + predicate: (rand() > 0.5) (type: boolean) + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 21 Data size: 2131 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_repeated_alias.q.out ql/src/test/results/clientpositive/ppd_repeated_alias.q.out index 084328b..e96130e 100644 --- ql/src/test/results/clientpositive/ppd_repeated_alias.q.out +++ ql/src/test/results/clientpositive/ppd_repeated_alias.q.out @@ -30,9 +30,6 @@ FROM pokes a LEFT OUTER JOIN pokes2 b ON a.foo=b.foo WHERE b.bar=3 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME pokes) a) (TOK_TABREF (TOK_TABNAME pokes2) b) (= (. (TOK_TABLE_OR_COL a) foo) (. (TOK_TABLE_OR_COL b) foo)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) foo) foo1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) foo) foo2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) bar))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) bar) 3)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -40,39 +37,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: foo - type: int + key expressions: foo (type: int) sort order: + - Map-reduce partition columns: - expr: foo - type: int - tag: 0 - value expressions: - expr: foo - type: int - b + Map-reduce partition columns: foo (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: foo (type: int), bar (type: int) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: foo - type: int + key expressions: foo (type: int) sort order: + - Map-reduce partition columns: - expr: foo - type: int - tag: 1 - value expressions: - expr: foo - type: int - expr: bar - type: int + Map-reduce partition columns: foo (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: foo (type: int) Reduce Operator Tree: Join Operator condition map: @@ -80,24 +63,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (_col6 = 3) - type: boolean + predicate: (_col6 = 3) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col5 - type: int - expr: _col6 - type: int + expressions: _col0 (type: int), _col5 (type: int), _col6 (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -123,9 +100,6 @@ SELECT * FROM ON a.foo=b.foo) a WHERE a.bar=3 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME pokes) a) (TOK_TABREF (TOK_TABNAME pokes2) b) (= (. (TOK_TABLE_OR_COL a) foo) (. (TOK_TABLE_OR_COL b) foo)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) foo) foo1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) foo) foo2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) bar))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) bar) 3)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -133,39 +107,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: foo - type: int + key expressions: foo (type: int) sort order: + - Map-reduce partition columns: - expr: foo - type: int - tag: 0 - value expressions: - expr: foo - type: int - a:b + Map-reduce partition columns: foo (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: foo (type: int), bar (type: int) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: foo - type: int + key expressions: foo (type: int) sort order: + - Map-reduce partition columns: - expr: foo - type: int - tag: 1 - value expressions: - expr: foo - type: int - expr: bar - type: int + Map-reduce partition columns: foo (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: foo (type: int) Reduce Operator Tree: Join Operator condition map: @@ -173,24 +133,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (_col6 = 3) - type: boolean + predicate: (_col6 = 3) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col5 - type: int - expr: _col6 - type: int + expressions: _col0 (type: int), _col5 (type: int), _col6 (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -216,9 +170,6 @@ SELECT * FROM ON a.foo=b.foo) a WHERE a.bar=3 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME pokes) a) (TOK_TABREF (TOK_TABNAME pokes2) b) (= (. (TOK_TABLE_OR_COL a) foo) (. (TOK_TABLE_OR_COL b) foo)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) foo) foo1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) foo) foo2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) bar))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) bar) 3)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -226,43 +177,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:a + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: foo (type: int) + sort order: + + Map-reduce partition columns: foo (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: foo (type: int) TableScan alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (bar = 3) - type: boolean + predicate: (bar = 3) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: foo - type: int + key expressions: foo (type: int) sort order: + - Map-reduce partition columns: - expr: foo - type: int - tag: 0 - value expressions: - expr: foo - type: int - expr: bar - type: int - a:b - TableScan - alias: b - Reduce Output Operator - key expressions: - expr: foo - type: int - sort order: + - Map-reduce partition columns: - expr: foo - type: int - tag: 1 - value expressions: - expr: foo - type: int + Map-reduce partition columns: foo (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: foo (type: int), bar (type: int) Reduce Operator Tree: Join Operator condition map: @@ -270,20 +206,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col5 - type: int - expr: _col1 - type: int + expressions: _col0 (type: int), _col5 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -299,9 +230,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- Q4: here, the filter c.bar should be created under the first join but above the second explain select c.foo, d.bar from (select c.foo, b.bar, c.blah from pokes c left outer join pokes b on c.foo=b.foo) c left outer join pokes d where d.foo=1 and c.bar=2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME pokes) c) (TOK_TABREF (TOK_TABNAME pokes) b) (= (. (TOK_TABLE_OR_COL c) foo) (. (TOK_TABLE_OR_COL b) foo)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) foo)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) bar)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) blah))))) c) (TOK_TABREF (TOK_TABNAME pokes) d))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) foo)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL d) bar))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL d) foo) 1) (= (. (TOK_TABLE_OR_COL c) bar) 2))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -310,37 +238,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - c:b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: foo - type: int + key expressions: foo (type: int) sort order: + - Map-reduce partition columns: - expr: foo - type: int - tag: 1 - value expressions: - expr: bar - type: int - c:c + Map-reduce partition columns: foo (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: bar (type: int) TableScan alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: foo - type: int + key expressions: foo (type: int) sort order: + - Map-reduce partition columns: - expr: foo - type: int - tag: 0 - value expressions: - expr: foo - type: int + Map-reduce partition columns: foo (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: foo (type: int) Reduce Operator Tree: Join Operator condition map: @@ -348,20 +264,17 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (_col6 = 2) - type: boolean + predicate: (_col6 = 2) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -369,26 +282,19 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator sort order: - tag: 0 - value expressions: - expr: _col0 - type: int - d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: foo (type: int), bar (type: int) TableScan - alias: d Reduce Output Operator sort order: - tag: 1 - value expressions: - expr: foo - type: int - expr: bar - type: int + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -396,22 +302,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (_col3 = 1) - type: boolean + predicate: (_col3 = 1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col4 - type: int + expressions: _col0 (type: int), _col4 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_transform.q.out ql/src/test/results/clientpositive/ppd_transform.q.out index 5668535..e7c07ed 100644 --- ql/src/test/results/clientpositive/ppd_transform.q.out +++ ql/src/test/results/clientpositive/ppd_transform.q.out @@ -16,9 +16,6 @@ FROM ( ) tmap SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST tkey tvalue)))) (TOK_CLUSTERBY (TOK_TABLE_OR_COL tkey)))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) tkey)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) tvalue))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL tmap) tkey) 100)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -26,53 +23,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 100) - type: boolean + predicate: (_col0 < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -204,9 +188,6 @@ FROM ( ) tmap SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST tkey tvalue)))) (TOK_CLUSTERBY (TOK_TABLE_OR_COL tkey)))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) tkey)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) tvalue))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL tmap) tkey) 100)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -214,53 +195,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 < 100) - type: boolean + predicate: (_col0 < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_udf_case.q.out ql/src/test/results/clientpositive/ppd_udf_case.q.out index 81cf617..5ecabfb 100644 --- ql/src/test/results/clientpositive/ppd_udf_case.q.out +++ ql/src/test/results/clientpositive/ppd_udf_case.q.out @@ -24,9 +24,6 @@ WHERE a.ds = '2008-04-08' AND END ORDER BY a.key, a.value, a.ds, a.hr, b.key, b.value, b.ds, b.hr POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (AND (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08') (= (. (TOK_TABLE_OR_COL b) ds) '2008-04-08')) (TOK_FUNCTION CASE (. (TOK_TABLE_OR_COL a) key) '27' TRUE '38' FALSE TOK_NULL))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) ds)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) hr)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL b) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL b) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL b) ds)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL b) hr))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -35,57 +32,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: CASE (key) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END - type: boolean + predicate: CASE (key) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: CASE (key) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END - type: boolean + predicate: CASE (key) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) Reduce Operator Tree: Join Operator condition map: @@ -93,34 +64,17 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col6, _col7, _col8, _col9 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((_col2 = '2008-04-08') and (_col8 = '2008-04-08')) and CASE (_col0) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END) - type: boolean + predicate: (((_col2 = '2008-04-08') and (_col8 = '2008-04-08')) and CASE (_col0) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END) (type: boolean) + Statistics: Num rows: 3 Data size: 618 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 3 Data size: 618 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -128,51 +82,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - expr: _col7 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) sort order: ++++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - expr: _col7 - type: string + Statistics: Num rows: 3 Data size: 618 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 618 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 618 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -244,9 +166,6 @@ WHERE a.ds = '2008-04-08' AND END ORDER BY a.key, a.value, a.ds, a.hr, b.key, b.value, b.ds, b.hr POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (AND (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08') (= (. (TOK_TABLE_OR_COL b) ds) '2008-04-08')) (TOK_FUNCTION CASE (. (TOK_TABLE_OR_COL a) key) '27' TRUE '38' FALSE TOK_NULL))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) ds)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) hr)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL b) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL b) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL b) ds)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL b) hr))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -255,57 +174,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: CASE (key) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END - type: boolean + predicate: CASE (key) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: CASE (key) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END - type: boolean + predicate: CASE (key) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) Reduce Operator Tree: Join Operator condition map: @@ -313,30 +206,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col6, _col7, _col8, _col9 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -344,51 +221,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - expr: _col7 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) sort order: ++++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - expr: _col7 - type: string + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_udf_col.q.out ql/src/test/results/clientpositive/ppd_udf_col.q.out index 1a8bf07..bc42d8d 100644 --- ql/src/test/results/clientpositive/ppd_udf_col.q.out +++ ql/src/test/results/clientpositive/ppd_udf_col.q.out @@ -8,9 +8,6 @@ SELECT key, randum123 FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a WHERE randum123 <=0.1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_FUNCTION rand)) randum123)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 100)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL randum123))) (TOK_WHERE (<= (TOK_TABLE_OR_COL randum123) 0.1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -18,35 +15,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 100) - type: boolean + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: rand() - type: double + expressions: key (type: string), rand() (type: double) outputColumnNames: _col0, _col2 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col2 <= 0.1) - type: boolean + predicate: (_col2 <= 0.1) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double + expressions: _col0 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -70,9 +59,6 @@ SELECT key, randum123 FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a WHERE randum123 <=0.1)s WHERE s.randum123>0.1 LIMIT 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_FUNCTION rand)) randum123)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 100)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL randum123))) (TOK_WHERE (<= (TOK_TABLE_OR_COL randum123) 0.1)))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s) randum123) 0.1)) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -80,47 +66,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s:a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 100) - type: boolean + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: rand() - type: double + expressions: key (type: string), rand() (type: double) outputColumnNames: _col0, _col2 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col2 <= 0.1) - type: boolean + predicate: (_col2 <= 0.1) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double + expressions: _col0 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col1 > 0.1) - type: boolean + predicate: (_col1 > 0.1) (type: boolean) + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -140,9 +116,6 @@ SELECT key,randum123, h4 FROM (SELECT *, cast(rand() as double) AS randum123, hex(4) AS h4 FROM src WHERE key = 100) a WHERE a.h4 <= 3 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_FUNCTION rand)) randum123) (TOK_SELEXPR (TOK_FUNCTION hex 4) h4)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 100)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL randum123)) (TOK_SELEXPR (TOK_TABLE_OR_COL h4))) (TOK_WHERE (<= (. (TOK_TABLE_OR_COL a) h4) 3)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -150,39 +123,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 100) - type: boolean + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: rand() - type: double - expr: hex(4) - type: string + expressions: key (type: string), rand() (type: double), hex(4) (type: string) outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col3 <= 3) - type: boolean + predicate: (_col3 <= 3) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double - expr: _col3 - type: string + expressions: _col0 (type: string), _col2 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -202,9 +163,6 @@ SELECT key,randum123, v10 FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a WHERE a.v10 <= 200 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_FUNCTION rand)) randum123) (TOK_SELEXPR (* (TOK_TABLE_OR_COL value) 10) v10)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 100)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL randum123)) (TOK_SELEXPR (TOK_TABLE_OR_COL v10))) (TOK_WHERE (<= (. (TOK_TABLE_OR_COL a) v10) 200)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -212,39 +170,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 100) - type: boolean + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: rand() - type: double - expr: (value * 10) - type: double + expressions: key (type: string), rand() (type: double), (value * 10) (type: double) outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col3 <= 200.0) - type: boolean + predicate: (_col3 <= 200.0) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double - expr: _col3 - type: double + expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -264,9 +210,6 @@ SELECT key, randum123 FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a WHERE randum123 <=0.1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_FUNCTION rand)) randum123)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 100)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL randum123))) (TOK_WHERE (<= (TOK_TABLE_OR_COL randum123) 0.1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -274,35 +217,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 100) - type: boolean + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: rand() - type: double + expressions: key (type: string), rand() (type: double) outputColumnNames: _col0, _col2 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col2 <= 0.1) - type: boolean + predicate: (_col2 <= 0.1) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double + expressions: _col0 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -326,9 +261,6 @@ SELECT key, randum123 FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a WHERE randum123 <=0.1)s WHERE s.randum123>0.1 LIMIT 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_FUNCTION rand)) randum123)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 100)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL randum123))) (TOK_WHERE (<= (TOK_TABLE_OR_COL randum123) 0.1)))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s) randum123) 0.1)) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -336,36 +268,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s:a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 100) - type: boolean + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: rand() - type: double + expressions: key (type: string), rand() (type: double) outputColumnNames: _col0, _col2 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((_col2 <= 0.1) and (_col2 > 0.1)) - type: boolean + predicate: ((_col2 <= 0.1) and (_col2 > 0.1)) (type: boolean) + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double + expressions: _col0 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -385,9 +311,6 @@ SELECT key,randum123, h4 FROM (SELECT *, cast(rand() as double) AS randum123, hex(4) AS h4 FROM src WHERE key = 100) a WHERE a.h4 <= 3 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_FUNCTION rand)) randum123) (TOK_SELEXPR (TOK_FUNCTION hex 4) h4)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 100)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL randum123)) (TOK_SELEXPR (TOK_TABLE_OR_COL h4))) (TOK_WHERE (<= (. (TOK_TABLE_OR_COL a) h4) 3)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -395,39 +318,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 100) - type: boolean + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: rand() - type: double - expr: hex(4) - type: string + expressions: key (type: string), rand() (type: double), hex(4) (type: string) outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col3 <= 3) - type: boolean + predicate: (_col3 <= 3) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double - expr: _col3 - type: string + expressions: _col0 (type: string), _col2 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -447,9 +358,6 @@ SELECT key,randum123, v10 FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a WHERE a.v10 <= 200 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_FUNCTION rand)) randum123) (TOK_SELEXPR (* (TOK_TABLE_OR_COL value) 10) v10)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 100)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL randum123)) (TOK_SELEXPR (TOK_TABLE_OR_COL v10))) (TOK_WHERE (<= (. (TOK_TABLE_OR_COL a) v10) 200)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -457,39 +365,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 100) - type: boolean + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: rand() - type: double - expr: (value * 10) - type: double + expressions: key (type: string), rand() (type: double), (value * 10) (type: double) outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col3 <= 200.0) - type: boolean + predicate: (_col3 <= 200.0) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double - expr: _col3 - type: double + expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_union.q.out ql/src/test/results/clientpositive/ppd_union.q.out index 17c63ed..5b243a5 100644 --- ql/src/test/results/clientpositive/ppd_union.q.out +++ ql/src/test/results/clientpositive/ppd_union.q.out @@ -16,9 +16,6 @@ FROM ( SELECT unioned_query.* WHERE key > '4' and value > 'val_4' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL src) key) '100')))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) '150'))))) unioned_query)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME unioned_query)))) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) '4') (> (TOK_TABLE_OR_COL value) 'val_4'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -26,69 +23,55 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:unioned_query-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((key < '100') and (key > '4')) and (value > 'val_4')) - type: boolean + predicate: (((key < '100') and (key > '4')) and (value > 'val_4')) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((_col0 > '4') and (_col1 > 'val_4')) - type: boolean + predicate: ((_col0 > '4') and (_col1 > 'val_4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - null-subquery2:unioned_query-subquery2:src TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((key > '150') and (key > '4')) and (value > 'val_4')) - type: boolean + predicate: (((key > '150') and (key > '4')) and (value > 'val_4')) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((_col0 > '4') and (_col1 > 'val_4')) - type: boolean + predicate: ((_col0 > '4') and (_col1 > 'val_4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -307,9 +290,6 @@ FROM ( SELECT unioned_query.* WHERE key > '4' and value > 'val_4' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL src) key) '100')))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) '150'))))) unioned_query)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME unioned_query)))) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) '4') (> (TOK_TABLE_OR_COL value) 'val_4'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -317,61 +297,49 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:unioned_query-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((key < '100') and (key > '4')) and (value > 'val_4')) - type: boolean + predicate: (((key < '100') and (key > '4')) and (value > 'val_4')) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - null-subquery2:unioned_query-subquery2:src TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((key > '150') and (key > '4')) and (value > 'val_4')) - type: boolean + predicate: (((key > '150') and (key > '4')) and (value > 'val_4')) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/ppd_union_view.q.out ql/src/test/results/clientpositive/ppd_union_view.q.out index 3c86758..4d319d5 100644 --- ql/src/test/results/clientpositive/ppd_union_view.q.out +++ ql/src/test/results/clientpositive/ppd_union_view.q.out @@ -187,7 +187,25 @@ POSTHOOK: Lineage: t1_old PARTITION(ds=2011-10-13).value SIMPLE [] POSTHOOK: Lineage: t1_old PARTITION(ds=2011-10-14).keymap SIMPLE [] POSTHOOK: Lineage: t1_old PARTITION(ds=2011-10-14).value SIMPLE [] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2011-10-13')))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + TOK_TABLE_OR_COL + ds + '2011-10-13' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -197,57 +215,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1-subquery2:subq-subquery2:t1_mapping + Map Operator Tree: TableScan alias: t1_mapping - Statistics: - numRows: 1 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: keymap - type: string - expr: ds - type: string + key expressions: keymap (type: string), ds (type: string) sort order: ++ - Map-reduce partition columns: - expr: keymap - type: string - expr: ds - type: string - Statistics: - numRows: 1 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: keymap (type: string), ds (type: string) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: - expr: key - type: string - t1-subquery2:subq-subquery2:t1_old + value expressions: key (type: string) TableScan alias: t1_old - Statistics: - numRows: 1 dataSize: 14 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: keymap - type: string - expr: ds - type: string + key expressions: keymap (type: string), ds (type: string) sort order: ++ - Map-reduce partition columns: - expr: keymap - type: string - expr: ds - type: string - Statistics: - numRows: 1 dataSize: 14 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: keymap (type: string), ds (type: string) + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: - expr: value - type: string - expr: ds - type: string + value expressions: value (type: string), ds (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -344,21 +334,12 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col1, _col2, _col5 - Statistics: - numRows: 1 dataSize: 15 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col5 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col5 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 1 dataSize: 15 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -379,31 +360,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Union - Statistics: - numRows: 1 dataSize: 15 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 1 dataSize: 15 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 15 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -419,66 +390,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - t1-subquery1:subq-subquery1:t1_new - TableScan - alias: t1_new - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '2011-10-13') - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Union - Statistics: - numRows: 1 dataSize: 15 basicStatsState: COMPLETE colStatsState: NONE - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 1 dataSize: 15 basicStatsState: COMPLETE colStatsState: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 15 basicStatsState: COMPLETE colStatsState: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -590,7 +501,25 @@ POSTHOOK: Lineage: t1_old PARTITION(ds=2011-10-13).value SIMPLE [] POSTHOOK: Lineage: t1_old PARTITION(ds=2011-10-14).keymap SIMPLE [] POSTHOOK: Lineage: t1_old PARTITION(ds=2011-10-14).value SIMPLE [] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2011-10-15')))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + TOK_TABLE_OR_COL + ds + '2011-10-15' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -600,71 +529,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1-subquery2:subq-subquery2:t1_mapping - TableScan - alias: t1_mapping - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '2011-10-15') - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Reduce Output Operator - key expressions: - expr: keymap - type: string - expr: ds - type: string - sort order: ++ - Map-reduce partition columns: - expr: keymap - type: string - expr: ds - type: string - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - tag: 1 - value expressions: - expr: key - type: string - t1-subquery2:subq-subquery2:t1_old - TableScan - alias: t1_old - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '2011-10-15') - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Reduce Output Operator - key expressions: - expr: keymap - type: string - expr: ds - type: string - sort order: ++ - Map-reduce partition columns: - expr: keymap - type: string - expr: ds - type: string - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - tag: 0 - value expressions: - expr: value - type: string - expr: ds - type: string Needs Tagging: true Reduce Operator Tree: Join Operator @@ -673,21 +537,12 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col1, _col2, _col5 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col5 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col5 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -708,31 +563,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Union - Statistics: - numRows: 1 dataSize: 11 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 1 dataSize: 11 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 11 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -748,44 +593,26 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - t1-subquery1:subq-subquery1:t1_new TableScan alias: t1_new - Statistics: - numRows: 1 dataSize: 11 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string + expressions: key (type: string), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 1 dataSize: 11 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Union - Statistics: - numRows: 1 dataSize: 11 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 1 dataSize: 11 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 11 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/ppd_vc.q.out ql/src/test/results/clientpositive/ppd_vc.q.out index 49cbe9f..3fd8be8 100644 --- ql/src/test/results/clientpositive/ppd_vc.q.out +++ ql/src/test/results/clientpositive/ppd_vc.q.out @@ -9,7 +9,25 @@ explain extended select * from srcpart where BLOCK__OFFSET__INSIDE__FILE<100 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE) 100)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + < + TOK_TABLE_OR_COL + BLOCK__OFFSET__INSIDE__FILE + 100 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -18,40 +36,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart - Statistics: - numRows: 116 dataSize: 23248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (BLOCK__OFFSET__INSIDE__FILE < 100) - type: boolean - Statistics: - numRows: 38 dataSize: 7615 basicStatsState: COMPLETE colStatsState: NONE + predicate: (BLOCK__OFFSET__INSIDE__FILE < 100) (type: boolean) + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 38 dataSize: 7615 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 38 dataSize: 7615 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -311,7 +314,72 @@ select b.* from src a join on a.key=b.key AND b.BLOCK__OFFSET__INSIDE__FILE<50 order by ds,hr,BLOCK__OFFSET__INSIDE__FILE POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (< (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE) 100)))) b) (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (< (. (TOK_TABLE_OR_COL b) BLOCK__OFFSET__INSIDE__FILE) 50)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL hr)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_SELEXPR + TOK_TABLE_OR_COL + BLOCK__OFFSET__INSIDE__FILE + TOK_WHERE + < + TOK_TABLE_OR_COL + BLOCK__OFFSET__INSIDE__FILE + 100 + b + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + < + . + TOK_TABLE_OR_COL + b + BLOCK__OFFSET__INSIDE__FILE + 50 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + b + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + hr + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + BLOCK__OFFSET__INSIDE__FILE + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -321,74 +389,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: 0 - b:srcpart TableScan alias: srcpart - Statistics: - numRows: 116 dataSize: 23248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((BLOCK__OFFSET__INSIDE__FILE < 100) and (BLOCK__OFFSET__INSIDE__FILE < 50)) - type: boolean - Statistics: - numRows: 12 dataSize: 2404 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((BLOCK__OFFSET__INSIDE__FILE < 100) and (BLOCK__OFFSET__INSIDE__FILE < 50)) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string - expr: BLOCK__OFFSET__INSIDE__FILE - type: bigint + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: - numRows: 12 dataSize: 2404 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 12 dataSize: 2404 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: bigint + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -616,25 +646,12 @@ STAGE PLANS: condition expressions: 0 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} - handleSkewJoin: false outputColumnNames: _col4, _col5, _col6, _col7, _col8 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col8 - type: bigint + expressions: _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -655,33 +672,15 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: bigint + key expressions: _col2 (type: string), _col3 (type: string), _col4 (type: bigint) sort order: +++ - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: bigint + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -710,15 +709,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/ppr_allchildsarenull.q.out ql/src/test/results/clientpositive/ppr_allchildsarenull.q.out index 2103e5a..3218d99 100644 --- ql/src/test/results/clientpositive/ppr_allchildsarenull.q.out +++ ql/src/test/results/clientpositive/ppr_allchildsarenull.q.out @@ -17,7 +17,48 @@ SELECT ELSE 0 end ) > 0 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL key)) user_id) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '2008-04-08') (> (TOK_FUNCTION WHEN (OR (LIKE (TOK_TABLE_OR_COL value) 'aaa%') (LIKE (TOK_TABLE_OR_COL value) 'vvv%')) 1 0) 0))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + TOK_INT + TOK_TABLE_OR_COL + key + user_id + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + and + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + > + TOK_FUNCTION + WHEN + OR + LIKE + TOK_TABLE_OR_COL + value + 'aaa%' + LIKE + TOK_TABLE_OR_COL + value + 'vvv%' + 1 + 0 + 0 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -26,36 +67,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (CASE WHEN (((value like 'aaa%') or (value like 'vvv%'))) THEN (1) ELSE (0) END > 0) - type: boolean - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + predicate: (CASE WHEN (((value like 'aaa%') or (value like 'vvv%'))) THEN (1) ELSE (0) END > 0) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string + expressions: UDFToInteger(key) (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -209,7 +239,48 @@ SELECT ELSE 0 end ) > 0 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL key)) user_id) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '2008-04-08') (> (TOK_FUNCTION WHEN (OR (LIKE (TOK_TABLE_OR_COL value) 'aaa%') (LIKE (TOK_TABLE_OR_COL value) 'vvv%')) 1 0) 0))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + TOK_INT + TOK_TABLE_OR_COL + key + user_id + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + and + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + > + TOK_FUNCTION + WHEN + OR + LIKE + TOK_TABLE_OR_COL + value + 'aaa%' + LIKE + TOK_TABLE_OR_COL + value + 'vvv%' + 1 + 0 + 0 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -218,36 +289,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart - Statistics: - numRows: 116 dataSize: 23248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((ds = '2008-04-08') and (CASE WHEN (((value like 'aaa%') or (value like 'vvv%'))) THEN (1) ELSE (0) END > 0)) - type: boolean - Statistics: - numRows: 38 dataSize: 7615 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((ds = '2008-04-08') and (CASE WHEN (((value like 'aaa%') or (value like 'vvv%'))) THEN (1) ELSE (0) END > 0)) (type: boolean) + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string + expressions: UDFToInteger(key) (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 38 dataSize: 7615 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 38 dataSize: 7615 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/ppr_pushdown3.q.out ql/src/test/results/clientpositive/ppr_pushdown3.q.out index 073e6f1..10b4ad2 100644 --- ql/src/test/results/clientpositive/ppr_pushdown3.q.out +++ ql/src/test/results/clientpositive/ppr_pushdown3.q.out @@ -2,9 +2,6 @@ PREHOOK: query: explain select * from srcpart where key < 10 PREHOOK: type: QUERY POSTHOOK: query: explain select * from srcpart where key < 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -12,28 +9,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -103,9 +92,6 @@ PREHOOK: query: explain select * from srcpart PREHOOK: type: QUERY POSTHOOK: query: explain select * from srcpart POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -116,17 +102,11 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select * from srcpart @@ -2149,9 +2129,6 @@ PREHOOK: query: explain select key from srcpart PREHOOK: type: QUERY POSTHOOK: query: explain select key from srcpart POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2159,18 +2136,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/push_or.q.out ql/src/test/results/clientpositive/push_or.q.out index 879f2c0..8864ebf 100644 --- ql/src/test/results/clientpositive/push_or.q.out +++ ql/src/test/results/clientpositive/push_or.q.out @@ -38,7 +38,44 @@ POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).value SIMPLE [(src)src.Field POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME push_or))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (or (= (TOK_TABLE_OR_COL ds) '2000-04-09') (= (TOK_TABLE_OR_COL key) 5))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + push_or + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + = + TOK_TABLE_OR_COL + ds + '2000-04-09' + = + TOK_TABLE_OR_COL + key + 5 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -47,48 +84,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - push_or + Map Operator Tree: TableScan alias: push_or - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((ds = '2000-04-09') or (key = 5)) - type: boolean - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((ds = '2000-04-09') or (key = 5)) (type: boolean) + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col2 - type: string + key expressions: _col0 (type: int), _col2 (type: string) sort order: ++ - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -180,15 +194,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out index 5fdb988..2f36c4e 100644 --- ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out +++ ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out @@ -76,9 +76,6 @@ POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._bucketname SIMPLE POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._count_of_l_shipdate EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._offsets EXPRESSION [(lineitem)lineitem.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__.l_shipdate SIMPLE [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME lineitem))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL l_shipdate)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL l_shipdate)))) (TOK_GROUPBY (TOK_TABLE_OR_COL l_shipdate)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -86,56 +83,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - lineitem + Map Operator Tree: TableScan alias: lineitem + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: l_shipdate - type: string + expressions: l_shipdate (type: string) outputColumnNames: l_shipdate + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(l_shipdate) - bucketGroup: false - keys: - expr: l_shipdate - type: string + aggregations: count(l_shipdate) + keys: l_shipdate (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -270,9 +251,6 @@ POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._bucketname SIMPLE POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._count_of_l_shipdate EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._offsets EXPRESSION [(lineitem)lineitem.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__.l_shipdate SIMPLE [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME lineitem))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL l_shipdate)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL l_shipdate)))) (TOK_GROUPBY (TOK_TABLE_OR_COL l_shipdate)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -280,58 +258,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - default__lineitem_lineitem_lshipdate_idx__ + Map Operator Tree: TableScan alias: default__lineitem_lineitem_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: l_shipdate - type: string - expr: _count_of_l_shipdate - type: bigint + expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) outputColumnNames: l_shipdate, _count_of_l_shipdate + Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_count_of_l_shipdate) + aggregations: sum(_count_of_l_shipdate) bucketGroup: true - keys: - expr: l_shipdate - type: string + keys: l_shipdate (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -472,9 +433,6 @@ POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._bucketname SIMPLE POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._count_of_l_shipdate EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._offsets EXPRESSION [(lineitem)lineitem.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__.l_shipdate SIMPLE [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME lineitem))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION year (TOK_TABLE_OR_COL l_shipdate)) year) (TOK_SELEXPR (TOK_FUNCTION month (TOK_TABLE_OR_COL l_shipdate)) month) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL l_shipdate)) monthly_shipments)) (TOK_GROUPBY (TOK_FUNCTION year (TOK_TABLE_OR_COL l_shipdate)) (TOK_FUNCTION month (TOK_TABLE_OR_COL l_shipdate))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL year)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL month))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -483,66 +441,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - lineitem + Map Operator Tree: TableScan alias: lineitem + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: l_shipdate - type: string + expressions: l_shipdate (type: string) outputColumnNames: l_shipdate + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(l_shipdate) - bucketGroup: false - keys: - expr: year(l_shipdate) - type: int - expr: month(l_shipdate) - type: int + aggregations: count(l_shipdate) + keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: bigint + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -550,29 +481,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: bigint + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -665,9 +586,6 @@ POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._bucketname SIMPLE POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._count_of_l_shipdate EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._offsets EXPRESSION [(lineitem)lineitem.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__.l_shipdate SIMPLE [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME lineitem))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION year (TOK_TABLE_OR_COL l_shipdate)) year) (TOK_SELEXPR (TOK_FUNCTION month (TOK_TABLE_OR_COL l_shipdate)) month) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL l_shipdate)) monthly_shipments)) (TOK_GROUPBY (TOK_FUNCTION year (TOK_TABLE_OR_COL l_shipdate)) (TOK_FUNCTION month (TOK_TABLE_OR_COL l_shipdate))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL year)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL month))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -676,68 +594,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - default__lineitem_lineitem_lshipdate_idx__ + Map Operator Tree: TableScan alias: default__lineitem_lineitem_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: l_shipdate - type: string - expr: _count_of_l_shipdate - type: bigint + expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) outputColumnNames: l_shipdate, _count_of_l_shipdate + Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_count_of_l_shipdate) - bucketGroup: false - keys: - expr: year(l_shipdate) - type: int - expr: month(l_shipdate) - type: int + aggregations: sum(_count_of_l_shipdate) + keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: bigint + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -745,29 +634,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: bigint + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -886,9 +765,6 @@ POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._bucketname SIMPLE POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._count_of_l_shipdate EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._offsets EXPRESSION [(lineitem)lineitem.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__.l_shipdate SIMPLE [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME lineitem))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION year (TOK_TABLE_OR_COL l_shipdate)) year) (TOK_SELEXPR (TOK_FUNCTION month (TOK_TABLE_OR_COL l_shipdate)) month) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL l_shipdate)) monthly_shipments)) (TOK_WHERE (= (TOK_FUNCTION year (TOK_TABLE_OR_COL l_shipdate)) 1997)) (TOK_GROUPBY (TOK_FUNCTION year (TOK_TABLE_OR_COL l_shipdate)) (TOK_FUNCTION month (TOK_TABLE_OR_COL l_shipdate))))) lastyear) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME lineitem))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION year (TOK_TABLE_OR_COL l_shipdate)) year) (TOK_SELEXPR (TOK_FUNCTION month (TOK_TABLE_OR_COL l_shipdate)) month) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL l_shipdate)) monthly_shipments)) (TOK_WHERE (= (TOK_FUNCTION year (TOK_TABLE_OR_COL l_shipdate)) 1998)) (TOK_GROUPBY (TOK_FUNCTION year (TOK_TABLE_OR_COL l_shipdate)) (TOK_FUNCTION month (TOK_TABLE_OR_COL l_shipdate))))) thisyear) (= (. (TOK_TABLE_OR_COL lastyear) month) (. (TOK_TABLE_OR_COL thisyear) month)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL lastyear) month)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL thisyear) month)) (TOK_SELEXPR (/ (- (. (TOK_TABLE_OR_COL thisyear) monthly_shipments) (. (TOK_TABLE_OR_COL lastyear) monthly_shipments)) (. (TOK_TABLE_OR_COL lastyear) monthly_shipments)) monthly_shipments_delta)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -898,70 +774,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - lastyear:default__lineitem_lineitem_lshipdate_idx__ + Map Operator Tree: TableScan alias: lastyear:default__lineitem_lineitem_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (year(l_shipdate) = 1997) - type: boolean + predicate: (year(l_shipdate) = 1997) (type: boolean) + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: l_shipdate - type: string - expr: _count_of_l_shipdate - type: bigint + expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) outputColumnNames: l_shipdate, _count_of_l_shipdate + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_count_of_l_shipdate) - bucketGroup: false - keys: - expr: year(l_shipdate) - type: int - expr: month(l_shipdate) - type: int + aggregations: sum(_count_of_l_shipdate) + keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: int - expr: _col2 - type: bigint + expressions: _col1 (type: int), _col2 (type: bigint) outputColumnNames: _col1, _col2 + Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -969,39 +817,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: int + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: - expr: _col1 - type: int - tag: 0 - value expressions: - expr: _col1 - type: int - expr: _col2 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: int + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: - expr: _col1 - type: int - tag: 1 - value expressions: - expr: _col1 - type: int - expr: _col2 - type: bigint + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -1009,20 +839,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} {VALUE._col2} 1 {VALUE._col1} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col1, _col2, _col4, _col5 + Statistics: Num rows: 25 Data size: 2966 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: int - expr: _col4 - type: int - expr: ((_col5 - _col2) / _col2) - type: decimal(38,19) + expressions: _col1 (type: int), _col4 (type: int), ((_col5 - _col2) / _col2) (type: decimal(38,19)) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 2966 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 25 Data size: 2966 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1030,70 +855,42 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - thisyear:default__lineitem_lineitem_lshipdate_idx__ + Map Operator Tree: TableScan alias: thisyear:default__lineitem_lineitem_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (year(l_shipdate) = 1998) - type: boolean + predicate: (year(l_shipdate) = 1998) (type: boolean) + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: l_shipdate - type: string - expr: _count_of_l_shipdate - type: bigint + expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) outputColumnNames: l_shipdate, _count_of_l_shipdate + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_count_of_l_shipdate) - bucketGroup: false - keys: - expr: year(l_shipdate) - type: int - expr: month(l_shipdate) - type: int + aggregations: sum(_count_of_l_shipdate) + keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: int - expr: _col2 - type: bigint + expressions: _col1 (type: int), _col2 (type: bigint) outputColumnNames: _col1, _col2 + Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1119,9 +916,6 @@ POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._bucketname SIMPLE POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._count_of_l_shipdate EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__._offsets EXPRESSION [(lineitem)lineitem.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__lineitem_lineitem_lshipdate_idx__.l_shipdate SIMPLE [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME lineitem))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL l_shipdate)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL l_shipdate)) cnt)) (TOK_GROUPBY (TOK_TABLE_OR_COL l_shipdate)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME lineitem))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL l_shipdate)) (TOK_SELEXPR (TOK_TABLE_OR_COL l_orderkey) cnt))))) dummy)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL l_shipdate)) (TOK_SELEXPR (TOK_TABLE_OR_COL cnt))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1130,58 +924,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:default__lineitem_lineitem_lshipdate_idx__ + Map Operator Tree: TableScan alias: null-subquery1:default__lineitem_lineitem_lshipdate_idx__ + Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: l_shipdate - type: string - expr: _count_of_l_shipdate - type: bigint + expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) outputColumnNames: l_shipdate, _count_of_l_shipdate + Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_count_of_l_shipdate) + aggregations: sum(_count_of_l_shipdate) bucketGroup: true - keys: - expr: l_shipdate - type: string + keys: l_shipdate (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1189,45 +965,37 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - null-subquery2:dummy-subquery2:lineitem TableScan alias: lineitem + Statistics: Num rows: 116 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: l_shipdate - type: string - expr: UDFToLong(l_orderkey) - type: bigint + expressions: l_shipdate (type: string), UDFToLong(l_orderkey) (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 116 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1283,9 +1051,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL key)))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1293,62 +1058,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - default__tbl_tbl_key_idx__ + Map Operator Tree: TableScan alias: default__tbl_tbl_key_idx__ + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (key = 1) - type: boolean + predicate: (key = 1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: _count_of_key - type: bigint + expressions: key (type: int), _count_of_key (type: bigint) outputColumnNames: key, _count_of_key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(_count_of_key) + aggregations: sum(_count_of_key) bucketGroup: true - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1370,9 +1117,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1380,58 +1124,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - default__tbl_tbl_key_idx__ + Map Operator Tree: TableScan alias: default__tbl_tbl_key_idx__ + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: _count_of_key - type: bigint + expressions: key (type: int), _count_of_key (type: bigint) outputColumnNames: key, _count_of_key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: sum(_count_of_key) + aggregations: sum(_count_of_key) bucketGroup: true - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1453,9 +1180,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1463,38 +1187,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1516,9 +1236,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1526,42 +1243,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(key) - bucketGroup: false + aggregations: count(key) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1583,9 +1294,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1593,47 +1301,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1655,9 +1353,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_GROUPBY (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1665,57 +1360,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: value - type: int - expr: key - type: int + expressions: value (type: int), key (type: int) outputColumnNames: value, key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: value - type: int - expr: key - type: int + keys: value (type: int), key (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: int + expressions: _col1 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1737,9 +1412,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 3)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1747,51 +1419,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (key = 3) - type: boolean + predicate: (key = 3) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1813,9 +1474,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (= (TOK_TABLE_OR_COL value) 2)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1823,51 +1481,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (value = 2) - type: boolean + predicate: (value = 2) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1889,9 +1536,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_FUNCTION substr (TOK_TABLE_OR_COL key) 2 3)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1899,55 +1543,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int - expr: substr(key, 2, 3) - type: string + keys: key (type: int), substr(key, 2, 3) (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: string + keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1969,9 +1595,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1979,59 +1602,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: value - type: int - expr: key - type: int + expressions: value (type: int), key (type: int) outputColumnNames: value, key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: value - type: int - expr: key - type: int + keys: value (type: int), key (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: int - expr: _col0 - type: int + expressions: _col1 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2053,9 +1654,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL value) 1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2063,63 +1661,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (value = 1) - type: boolean + predicate: (value = 1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: int + expressions: key (type: int), value (type: int) outputColumnNames: key, value + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int - expr: value - type: int + keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int + expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2141,9 +1716,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2151,47 +1723,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2213,9 +1775,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2223,47 +1782,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2285,9 +1834,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2295,47 +1841,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2357,9 +1893,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2367,59 +1900,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: int + expressions: key (type: int), value (type: int) outputColumnNames: key, value + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int - expr: value - type: int + keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int + expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2441,9 +1952,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL value) 2)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2451,63 +1959,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (value = 2) - type: boolean + predicate: (value = 2) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: int + expressions: key (type: int), value (type: int) outputColumnNames: key, value + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int - expr: value - type: int + keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int + expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2529,9 +2014,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL value) 2) (= (TOK_TABLE_OR_COL key) 3))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2539,63 +2021,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: ((value = 2) and (key = 3)) - type: boolean + predicate: ((value = 2) and (key = 3)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: int + expressions: key (type: int), value (type: int) outputColumnNames: key, value + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int - expr: value - type: int + keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int + expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2617,9 +2076,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2627,63 +2083,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (value = key) - type: boolean + predicate: (value = key) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: int + expressions: key (type: int), value (type: int) outputColumnNames: key, value + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int - expr: value - type: int + keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int + expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2705,9 +2138,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 2 3))) (TOK_WHERE (= (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2715,63 +2145,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (value = key) - type: boolean + predicate: (value = key) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: int + expressions: key (type: int), value (type: int) outputColumnNames: key, value + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int - expr: substr(value, 2, 3) - type: string + keys: key (type: int), substr(value, 2, 3) (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: string + keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2793,9 +2200,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION substr (TOK_TABLE_OR_COL value) 2 3))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2803,59 +2207,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: int + expressions: key (type: int), value (type: int) outputColumnNames: key, value + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int - expr: substr(value, 2, 3) - type: string + keys: key (type: int), substr(value, 2, 3) (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: string + keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2877,9 +2259,6 @@ POSTHOOK: Lineage: default__tbl_tbl_key_idx__._bucketname SIMPLE [(tbl)tbl.Field POSTHOOK: Lineage: default__tbl_tbl_key_idx__._count_of_key EXPRESSION [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__._offsets EXPRESSION [(tbl)tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__tbl_tbl_key_idx__.key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) v1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL v1) value) 2)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2887,63 +2266,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - v1:tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (value = 2) - type: boolean + predicate: (value = 2) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: int + expressions: key (type: int), value (type: int) outputColumnNames: key, value + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int - expr: value - type: int + keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: int + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: int - tag: -1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: int + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int + expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3149,9 +2505,6 @@ POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(srcpa POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tblpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL key)))) (TOK_WHERE (AND (AND (= (TOK_TABLE_OR_COL ds) '2008-04-09') (= (TOK_TABLE_OR_COL hr) 12)) (< (TOK_TABLE_OR_COL key) 10))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -3159,60 +2512,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tblpart + Map Operator Tree: TableScan alias: tblpart + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(key) - bucketGroup: false - keys: - expr: key - type: int + aggregations: count(key) + keys: key (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3372,9 +2708,6 @@ POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(srcpa POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tblpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL key)))) (TOK_WHERE (AND (AND (= (TOK_TABLE_OR_COL ds) '2008-04-09') (= (TOK_TABLE_OR_COL hr) 12)) (< (TOK_TABLE_OR_COL key) 10))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -3382,62 +2715,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - default__tblpart_tbl_part_index__ + Map Operator Tree: TableScan alias: default__tblpart_tbl_part_index__ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: _count_of_key - type: bigint + expressions: key (type: int), _count_of_key (type: bigint) outputColumnNames: key, _count_of_key + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_count_of_key) - bucketGroup: false - keys: - expr: key - type: int + aggregations: sum(_count_of_key) + keys: key (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3719,9 +3033,6 @@ POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(srcpa POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3730,56 +3041,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tbl + Map Operator Tree: TableScan alias: tbl + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(key) - bucketGroup: false - keys: - expr: key - type: int + aggregations: count(key) + keys: key (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3787,25 +3081,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3905,9 +3193,6 @@ POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=11).key EXPRESSION [(srcpa POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=12).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tblpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3916,58 +3201,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - default__tbl_tbl_key_idx__ + Map Operator Tree: TableScan alias: default__tbl_tbl_key_idx__ + Statistics: Num rows: 6 Data size: 586 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: _count_of_key - type: bigint + expressions: key (type: int), _count_of_key (type: bigint) outputColumnNames: key, _count_of_key + Statistics: Num rows: 6 Data size: 586 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_count_of_key) + aggregations: sum(_count_of_key) bucketGroup: true - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 586 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 586 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3975,25 +3242,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/query_result_fileformat.q.out ql/src/test/results/clientpositive/query_result_fileformat.q.out index 741eaf1..644ac0c 100644 --- ql/src/test/results/clientpositive/query_result_fileformat.q.out +++ ql/src/test/results/clientpositive/query_result_fileformat.q.out @@ -38,9 +38,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from nzhang_test1 where key='key1' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME nzhang_test1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 'key1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -48,24 +45,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - nzhang_test1 + Map Operator Tree: TableScan alias: nzhang_test1 + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 'key1') - type: boolean + predicate: (key = 'key1') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -114,9 +107,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from nzhang_test1 where key='key1' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME nzhang_test1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 'key1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -124,24 +114,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - nzhang_test1 + Map Operator Tree: TableScan alias: nzhang_test1 + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 'key1') - type: boolean + predicate: (key = 'key1') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/quote1.q.out ql/src/test/results/clientpositive/quote1.q.out index b8f70bc..6e76fe2 100644 --- ql/src/test/results/clientpositive/quote1.q.out +++ ql/src/test/results/clientpositive/quote1.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 PARTITION(`table`='2008-04-08') SELECT src.key as `partition`, src.value as `from` WHERE src.key >= 200 and src.key < 300 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1) (TOK_PARTSPEC (TOK_PARTVAL table '2008-04-08')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) partition) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) from)) (TOK_WHERE (and (>= (. (TOK_TABLE_OR_COL src) key) 200) (< (. (TOK_TABLE_OR_COL src) key) 300))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -27,24 +24,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key >= 200) and (key < 300)) - type: boolean + predicate: ((key >= 200) and (key < 300)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string + expressions: UDFToInteger(key) (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -77,12 +70,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -91,12 +82,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -115,9 +104,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT `int`.`location`, `int`.`type`, `int`.`table` FROM dest1 `int` WHERE `int`.`table` = '2008-04-08' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1) int)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL int) location)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL int) type)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL int) table))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL int) table) '2008-04-08')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -125,30 +111,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - int - TableScan - alias: int - Filter Operator - predicate: - expr: (table = '2008-04-08') - type: boolean - Select Operator - expressions: - expr: location - type: int - expr: type - type: string - expr: table - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/quote2.q.out ql/src/test/results/clientpositive/quote2.q.out index e85a6ca..f84f058 100644 --- ql/src/test/results/clientpositive/quote2.q.out +++ ql/src/test/results/clientpositive/quote2.q.out @@ -28,9 +28,6 @@ SELECT FROM src LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'abc') (TOK_SELEXPR "abc") (TOK_SELEXPR 'abc\'') (TOK_SELEXPR "abc\"") (TOK_SELEXPR 'abc\\') (TOK_SELEXPR "abc\\") (TOK_SELEXPR 'abc\\\'') (TOK_SELEXPR "abc\\\"") (TOK_SELEXPR 'abc\\\\') (TOK_SELEXPR "abc\\\\") (TOK_SELEXPR 'abc\\\\\'') (TOK_SELEXPR "abc\\\\\"") (TOK_SELEXPR 'abc\\\\\\') (TOK_SELEXPR "abc\\\\\\") (TOK_SELEXPR 'abc""""\\') (TOK_SELEXPR "abc''''\\") (TOK_SELEXPR "awk '{print NR\"\\t\"$0}'") (TOK_SELEXPR 'tab\ttab') (TOK_SELEXPR "tab\ttab")) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -41,48 +38,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: 'abc' - type: string - expr: 'abc' - type: string - expr: 'abc'' - type: string - expr: 'abc"' - type: string - expr: 'abc\' - type: string - expr: 'abc\' - type: string - expr: 'abc\'' - type: string - expr: 'abc\"' - type: string - expr: 'abc\\' - type: string - expr: 'abc\\' - type: string - expr: 'abc\\'' - type: string - expr: 'abc\\"' - type: string - expr: 'abc\\\' - type: string - expr: 'abc\\\' - type: string - expr: 'abc""""\' - type: string - expr: 'abc''''\' - type: string - expr: 'awk '{print NR"\t"$0}'' - type: string - expr: 'tab tab' - type: string - expr: 'tab tab' - type: string + expressions: 'abc' (type: string), 'abc' (type: string), 'abc'' (type: string), 'abc"' (type: string), 'abc\' (type: string), 'abc\' (type: string), 'abc\'' (type: string), 'abc\"' (type: string), 'abc\\' (type: string), 'abc\\' (type: string), 'abc\\'' (type: string), 'abc\\"' (type: string), 'abc\\\' (type: string), 'abc\\\' (type: string), 'abc""""\' (type: string), 'abc''''\' (type: string), 'awk '{print NR"\t"$0}'' (type: string), 'tab tab' (type: string), 'tab tab' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT diff --git ql/src/test/results/clientpositive/quotedid_basic.q.out ql/src/test/results/clientpositive/quotedid_basic.q.out index d33637a..28d1072 100644 --- ql/src/test/results/clientpositive/quotedid_basic.q.out +++ ql/src/test/results/clientpositive/quotedid_basic.q.out @@ -24,9 +24,6 @@ PREHOOK: query: explain select `x+1`, `y&y`, `!@#$%^&*()_q` from t1 PREHOOK: type: QUERY POSTHOOK: query: explain select `x+1`, `y&y`, `!@#$%^&*()_q` from t1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x+1)) (TOK_SELEXPR (TOK_TABLE_OR_COL y&y)) (TOK_SELEXPR (TOK_TABLE_OR_COL !@#$%^&*()_q))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -34,22 +31,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: x+1 - type: string - expr: y&y - type: string - expr: !@#$%^&*()_q - type: string + expressions: x+1 (type: string), y&y (type: string), !@#$%^&*()_q (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -63,9 +55,6 @@ PREHOOK: query: explain select `x+1`, `y&y`, `!@#$%^&*()_q` from t1 where `!@#$% PREHOOK: type: QUERY POSTHOOK: query: explain select `x+1`, `y&y`, `!@#$%^&*()_q` from t1 where `!@#$%^&*()_q` = '1' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x+1)) (TOK_SELEXPR (TOK_TABLE_OR_COL y&y)) (TOK_SELEXPR (TOK_TABLE_OR_COL !@#$%^&*()_q))) (TOK_WHERE (= (TOK_TABLE_OR_COL !@#$%^&*()_q) '1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -73,26 +62,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (!@#$%^&*()_q = '1') - type: boolean + predicate: (!@#$%^&*()_q = '1') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: x+1 - type: string - expr: y&y - type: string - expr: !@#$%^&*()_q - type: string + expressions: x+1 (type: string), y&y (type: string), !@#$%^&*()_q (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -106,9 +89,6 @@ PREHOOK: query: explain select `x+1`, `y&y`, `!@#$%^&*()_q` from t1 where `!@#$% PREHOOK: type: QUERY POSTHOOK: query: explain select `x+1`, `y&y`, `!@#$%^&*()_q` from t1 where `!@#$%^&*()_q` = '1' group by `x+1`, `y&y`, `!@#$%^&*()_q` having `!@#$%^&*()_q` = '1' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x+1)) (TOK_SELEXPR (TOK_TABLE_OR_COL y&y)) (TOK_SELEXPR (TOK_TABLE_OR_COL !@#$%^&*()_q))) (TOK_WHERE (= (TOK_TABLE_OR_COL !@#$%^&*()_q) '1')) (TOK_GROUPBY (TOK_TABLE_OR_COL x+1) (TOK_TABLE_OR_COL y&y) (TOK_TABLE_OR_COL !@#$%^&*()_q)) (TOK_HAVING (= (TOK_TABLE_OR_COL !@#$%^&*()_q) '1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -116,75 +96,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (!@#$%^&*()_q = '1') - type: boolean + predicate: (!@#$%^&*()_q = '1') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: x+1 - type: string - expr: y&y - type: string - expr: !@#$%^&*()_q - type: string + expressions: x+1 (type: string), y&y (type: string), !@#$%^&*()_q (type: string) outputColumnNames: x+1, y&y, !@#$%^&*()_q + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: x+1 - type: string - expr: y&y - type: string - expr: !@#$%^&*()_q - type: string + keys: x+1 (type: string), y&y (type: string), !@#$%^&*()_q (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -200,9 +145,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select `x+1`, `y&y`, `!@#$%^&*()_q`, rank() over(partition by `!@#$%^&*()_q` order by `y&y`) from t1 where `!@#$%^&*()_q` = '1' group by `x+1`, `y&y`, `!@#$%^&*()_q` having `!@#$%^&*()_q` = '1' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x+1)) (TOK_SELEXPR (TOK_TABLE_OR_COL y&y)) (TOK_SELEXPR (TOK_TABLE_OR_COL !@#$%^&*()_q)) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL !@#$%^&*()_q)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL y&y)))))))) (TOK_WHERE (= (TOK_TABLE_OR_COL !@#$%^&*()_q) '1')) (TOK_GROUPBY (TOK_TABLE_OR_COL x+1) (TOK_TABLE_OR_COL y&y) (TOK_TABLE_OR_COL !@#$%^&*()_q)) (TOK_HAVING (= (TOK_TABLE_OR_COL !@#$%^&*()_q) '1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -211,66 +153,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (!@#$%^&*()_q = '1') - type: boolean + predicate: (!@#$%^&*()_q = '1') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: x+1 - type: string - expr: y&y - type: string - expr: !@#$%^&*()_q - type: string + expressions: x+1 (type: string), y&y (type: string), !@#$%^&*()_q (type: string) outputColumnNames: x+1, y&y, !@#$%^&*()_q + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: x+1 - type: string - expr: y&y - type: string - expr: !@#$%^&*()_q - type: string + keys: x+1 (type: string), y&y (type: string), !@#$%^&*()_q (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -278,44 +189,26 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col2 - type: string - expr: _col1 - type: string + key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE PTF Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _wcol0 - type: int + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _wcol0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -333,9 +226,6 @@ POSTHOOK: query: -- case insensitive explain select `X+1`, `Y&y`, `!@#$%^&*()_Q`, rank() over(partition by `!@#$%^&*()_q` order by `y&y`) from t1 where `!@#$%^&*()_q` = '1' group by `x+1`, `y&Y`, `!@#$%^&*()_q` having `!@#$%^&*()_Q` = '1' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL X+1)) (TOK_SELEXPR (TOK_TABLE_OR_COL Y&y)) (TOK_SELEXPR (TOK_TABLE_OR_COL !@#$%^&*()_Q)) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL !@#$%^&*()_q)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL y&y)))))))) (TOK_WHERE (= (TOK_TABLE_OR_COL !@#$%^&*()_q) '1')) (TOK_GROUPBY (TOK_TABLE_OR_COL x+1) (TOK_TABLE_OR_COL y&Y) (TOK_TABLE_OR_COL !@#$%^&*()_q)) (TOK_HAVING (= (TOK_TABLE_OR_COL !@#$%^&*()_Q) '1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -344,66 +234,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: - expr: (!@#$%^&*()_q = '1') - type: boolean + predicate: (!@#$%^&*()_q = '1') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: x+1 - type: string - expr: y&y - type: string - expr: !@#$%^&*()_q - type: string + expressions: x+1 (type: string), y&y (type: string), !@#$%^&*()_q (type: string) outputColumnNames: x+1, y&y, !@#$%^&*()_q + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: x+1 - type: string - expr: y&y - type: string - expr: !@#$%^&*()_q - type: string + keys: x+1 (type: string), y&y (type: string), !@#$%^&*()_q (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -411,44 +270,26 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col2 - type: string - expr: _col1 - type: string + key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col2 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE PTF Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _wcol0 - type: int + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _wcol0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/quotedid_partition.q.out ql/src/test/results/clientpositive/quotedid_partition.q.out index 5f72d2a..7e13f20 100644 --- ql/src/test/results/clientpositive/quotedid_partition.q.out +++ ql/src/test/results/clientpositive/quotedid_partition.q.out @@ -30,9 +30,6 @@ group by `x+1`, `y&y`, `!@#$%^&*()_q` having `!@#$%^&*()_q` = 'a' POSTHOOK: type: QUERY POSTHOOK: Lineage: src_p PARTITION(!@#$%^&*()_q=a).x+1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_p PARTITION(!@#$%^&*()_q=a).y&y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_p))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x+1)) (TOK_SELEXPR (TOK_TABLE_OR_COL y&y)) (TOK_SELEXPR (TOK_TABLE_OR_COL !@#$%^&*()_q))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL !@#$%^&*()_q) 'a') (= (TOK_TABLE_OR_COL x+1) '10'))) (TOK_GROUPBY (TOK_TABLE_OR_COL x+1) (TOK_TABLE_OR_COL y&y) (TOK_TABLE_OR_COL !@#$%^&*()_q)) (TOK_HAVING (= (TOK_TABLE_OR_COL !@#$%^&*()_q) 'a')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -40,75 +37,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_p + Map Operator Tree: TableScan alias: src_p + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (x+1 = '10') - type: boolean + predicate: (x+1 = '10') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: x+1 - type: string - expr: y&y - type: string - expr: !@#$%^&*()_q - type: string + expressions: x+1 (type: string), y&y (type: string), !@#$%^&*()_q (type: string) outputColumnNames: x+1, y&y, !@#$%^&*()_q + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: x+1 - type: string - expr: y&y - type: string - expr: !@#$%^&*()_q - type: string + keys: x+1 (type: string), y&y (type: string), !@#$%^&*()_q (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string - expr: KEY._col2 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/quotedid_skew.q.out ql/src/test/results/clientpositive/quotedid_skew.q.out index 7985ea7..90b18d3 100644 --- ql/src/test/results/clientpositive/quotedid_skew.q.out +++ ql/src/test/results/clientpositive/quotedid_skew.q.out @@ -36,9 +36,6 @@ POSTHOOK: query: -- a simple join query with skew on both the tables on the join EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a. `!@#$%^&*()_q` = b. `!@#$%^&*()_q` POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) !@#$%^&*()_q) (. (TOK_TABLE_OR_COL b) !@#$%^&*()_q)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -48,49 +45,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (!@#$%^&*()_q = '2')) - type: boolean + predicate: (not (!@#$%^&*()_q = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: !@#$%^&*()_q - type: string + key expressions: !@#$%^&*()_q (type: string) sort order: + - Map-reduce partition columns: - expr: !@#$%^&*()_q - type: string - tag: 0 - value expressions: - expr: !@#$%^&*()_q - type: string - expr: y&y - type: string - subquery1:b + Map-reduce partition columns: !@#$%^&*()_q (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: !@#$%^&*()_q (type: string), y&y (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (!@#$%^&*()_q = '2')) - type: boolean + predicate: (not (!@#$%^&*()_q = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: !@#$%^&*()_q - type: string + key expressions: !@#$%^&*()_q (type: string) sort order: + - Map-reduce partition columns: - expr: !@#$%^&*()_q - type: string - tag: 1 - value expressions: - expr: !@#$%^&*()_q - type: string - expr: y&y - type: string + Map-reduce partition columns: !@#$%^&*()_q (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: !@#$%^&*()_q (type: string), y&y (type: string) Reduce Operator Tree: Join Operator condition map: @@ -98,22 +77,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -121,27 +92,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -149,49 +122,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (!@#$%^&*()_q = '2') - type: boolean + predicate: (!@#$%^&*()_q = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: !@#$%^&*()_q - type: string + key expressions: !@#$%^&*()_q (type: string) sort order: + - Map-reduce partition columns: - expr: !@#$%^&*()_q - type: string - tag: 0 - value expressions: - expr: !@#$%^&*()_q - type: string - expr: y&y - type: string - b + Map-reduce partition columns: !@#$%^&*()_q (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: !@#$%^&*()_q (type: string), y&y (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (!@#$%^&*()_q = '2') - type: boolean + predicate: (!@#$%^&*()_q = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: !@#$%^&*()_q - type: string + key expressions: !@#$%^&*()_q (type: string) sort order: + - Map-reduce partition columns: - expr: !@#$%^&*()_q - type: string - tag: 1 - value expressions: - expr: !@#$%^&*()_q - type: string - expr: y&y - type: string + Map-reduce partition columns: !@#$%^&*()_q (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: !@#$%^&*()_q (type: string), y&y (type: string) Reduce Operator Tree: Join Operator condition map: @@ -199,22 +154,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/rand_partitionpruner1.q.out ql/src/test/results/clientpositive/rand_partitionpruner1.q.out index 5a6697d..ec5540b 100644 --- ql/src/test/results/clientpositive/rand_partitionpruner1.q.out +++ ql/src/test/results/clientpositive/rand_partitionpruner1.q.out @@ -5,7 +5,26 @@ POSTHOOK: query: -- scanning un-partitioned data explain extended select * from src where rand(1) < 0.1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_FUNCTION rand 1) 0.1)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + < + TOK_FUNCTION + rand + 1 + 0.1 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -14,36 +33,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (rand(1) < 0.1) - type: boolean - Statistics: - numRows: 9 dataSize: 1803 basicStatsState: COMPLETE colStatsState: NONE + predicate: (rand(1) < 0.1) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 9 dataSize: 1803 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 9 dataSize: 1803 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/rand_partitionpruner2.q.out ql/src/test/results/clientpositive/rand_partitionpruner2.q.out index cd9746f..1b3c962 100644 --- ql/src/test/results/clientpositive/rand_partitionpruner2.q.out +++ ql/src/test/results/clientpositive/rand_partitionpruner2.q.out @@ -16,7 +16,37 @@ insert overwrite table tmptable select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME tmptable))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))) (TOK_WHERE (and (< (TOK_FUNCTION rand 1) 0.1) (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + a + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + tmptable + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + a + TOK_WHERE + and + < + TOK_FUNCTION + rand + 1 + 0.1 + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -31,40 +61,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (rand(1) < 0.1) - type: boolean - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + predicate: (rand(1) < 0.1) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -211,8 +226,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -278,8 +292,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/rand_partitionpruner3.q.out ql/src/test/results/clientpositive/rand_partitionpruner3.q.out index 27d2845..321067e 100644 --- ql/src/test/results/clientpositive/rand_partitionpruner3.q.out +++ ql/src/test/results/clientpositive/rand_partitionpruner3.q.out @@ -7,7 +7,54 @@ POSTHOOK: query: -- complex predicates in the where clause explain extended select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))) (TOK_WHERE (and (and (and (< (TOK_FUNCTION rand 1) 0.1) (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08')) (not (or (> (TOK_TABLE_OR_COL key) 50) (< (TOK_TABLE_OR_COL key) 10)))) (like (. (TOK_TABLE_OR_COL a) hr) '%2'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + a + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + a + TOK_WHERE + and + and + and + < + TOK_FUNCTION + rand + 1 + 0.1 + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + not + or + > + TOK_TABLE_OR_COL + key + 50 + < + TOK_TABLE_OR_COL + key + 10 + like + . + TOK_TABLE_OR_COL + a + hr + '%2' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -16,40 +63,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((rand(1) < 0.1) and (not ((key > 50) or (key < 10)))) - type: boolean - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((rand(1) < 0.1) and (not ((key > 50) or (key < 10)))) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -139,7 +171,48 @@ POSTHOOK: query: -- without rand for comparison explain extended select a.* from srcpart a where a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08') (not (or (> (TOK_TABLE_OR_COL key) 50) (< (TOK_TABLE_OR_COL key) 10)))) (like (. (TOK_TABLE_OR_COL a) hr) '%2'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + a + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + a + TOK_WHERE + and + and + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + not + or + > + TOK_TABLE_OR_COL + key + 50 + < + TOK_TABLE_OR_COL + key + 10 + like + . + TOK_TABLE_OR_COL + a + hr + '%2' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -148,40 +221,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (not ((key > 50) or (key < 10))) - type: boolean - Statistics: - numRows: 11 dataSize: 2204 basicStatsState: COMPLETE colStatsState: NONE + predicate: (not ((key > 50) or (key < 10))) (type: boolean) + Statistics: Num rows: 11 Data size: 2204 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 11 dataSize: 2204 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 11 Data size: 2204 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 11 dataSize: 2204 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 11 Data size: 2204 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/rcfile_createas1.q.out ql/src/test/results/clientpositive/rcfile_createas1.q.out index 7111b23..5f7814c 100644 --- ql/src/test/results/clientpositive/rcfile_createas1.q.out +++ ql/src/test/results/clientpositive/rcfile_createas1.q.out @@ -55,9 +55,6 @@ POSTHOOK: Lineage: rcfile_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.Fi POSTHOOK: Lineage: rcfile_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_createas1a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME rcfile_createas1b) TOK_LIKETABLE TOK_TBLRCFILE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME rcfile_createas1a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION PMOD (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key)) 50) part))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -72,22 +69,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - rcfile_createas1a + Map Operator Tree: TableScan alias: rcfile_createas1a + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: (hash(key) pmod 50) - type: int + expressions: key (type: int), value (type: string), (hash(key) pmod 50) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat @@ -113,13 +105,10 @@ STAGE PLANS: Create Table Operator: Create Table columns: key int, value string, part int - if not exists: false input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde name: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: rcfile_createas1b - isExternal: false Stage: Stage-2 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/rcfile_merge1.q.out ql/src/test/results/clientpositive/rcfile_merge1.q.out index 52c3d28..cae31b6 100644 --- ql/src/test/results/clientpositive/rcfile_merge1.q.out +++ ql/src/test/results/clientpositive/rcfile_merge1.q.out @@ -32,9 +32,6 @@ EXPLAIN SELECT key, value, PMOD(HASH(key), 100) as part FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME rcfile_merge1) (TOK_PARTSPEC (TOK_PARTVAL ds '1') (TOK_PARTVAL part)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION PMOD (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key)) 100) part)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -48,22 +45,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string - expr: (hash(key) pmod 100) - type: int + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 100) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat @@ -97,12 +89,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat @@ -111,12 +101,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat @@ -645,9 +633,6 @@ POSTHOOK: Lineage: rcfile_merge1 PARTITION(ds=1,part=99).key EXPRESSION [(src)sr POSTHOOK: Lineage: rcfile_merge1 PARTITION(ds=1,part=99).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge1 PARTITION(ds=1,part=9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge1 PARTITION(ds=1,part=9).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME rcfile_merge1b) (TOK_PARTSPEC (TOK_PARTVAL ds '1') (TOK_PARTVAL part)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION PMOD (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key)) 100) part)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -661,22 +646,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string - expr: (hash(key) pmod 100) - type: int + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 100) (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat diff --git ql/src/test/results/clientpositive/rcfile_merge2.q.out ql/src/test/results/clientpositive/rcfile_merge2.q.out index 04f04fe..8b88486 100644 --- ql/src/test/results/clientpositive/rcfile_merge2.q.out +++ ql/src/test/results/clientpositive/rcfile_merge2.q.out @@ -21,9 +21,6 @@ POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE rcfile_merge2a PARTITION (one='1 PMOD(HASH(value), 10) as three FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME rcfile_merge2a) (TOK_PARTSPEC (TOK_PARTVAL one '1') (TOK_PARTVAL two) (TOK_PARTVAL three)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION PMOD (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key)) 10) two) (TOK_SELEXPR (TOK_FUNCTION PMOD (TOK_FUNCTION HASH (TOK_TABLE_OR_COL value)) 10) three)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -37,24 +34,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string - expr: (hash(key) pmod 10) - type: int - expr: (hash(value) pmod 10) - type: int + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat diff --git ql/src/test/results/clientpositive/rcfile_merge3.q.out ql/src/test/results/clientpositive/rcfile_merge3.q.out index 02800a3..9d0cae4 100644 --- ql/src/test/results/clientpositive/rcfile_merge3.q.out +++ ql/src/test/results/clientpositive/rcfile_merge3.q.out @@ -54,9 +54,6 @@ POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).key EXPRESSION [(src)src.Field POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME rcfile_merge3a))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME rcfile_merge3b))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -70,20 +67,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - rcfile_merge3a + Map Operator Tree: TableScan alias: rcfile_merge3a + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat diff --git ql/src/test/results/clientpositive/rcfile_merge4.q.out ql/src/test/results/clientpositive/rcfile_merge4.q.out index 7ea68f8..df27acc 100644 --- ql/src/test/results/clientpositive/rcfile_merge4.q.out +++ ql/src/test/results/clientpositive/rcfile_merge4.q.out @@ -54,9 +54,6 @@ POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).key EXPRESSION [(src)src.Field POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME rcfile_merge3a))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME rcfile_merge3b))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -70,20 +67,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - rcfile_merge3a + Map Operator Tree: TableScan alias: rcfile_merge3a + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -114,12 +108,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -128,12 +120,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/rcfile_null_value.q.out ql/src/test/results/clientpositive/rcfile_null_value.q.out index 83905d3..9c795db 100644 --- ql/src/test/results/clientpositive/rcfile_null_value.q.out +++ ql/src/test/results/clientpositive/rcfile_null_value.q.out @@ -87,9 +87,6 @@ INSERT OVERWRITE TABLE dest1_rc SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY POSTHOOK: Lineage: src1_rc.key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src1_rc.value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) src1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) value) c2)) (TOK_WHERE (and (> (. (TOK_TABLE_OR_COL src1) key) 10) (< (. (TOK_TABLE_OR_COL src1) key) 20))))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) src2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) key) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) value) c4)) (TOK_WHERE (and (> (. (TOK_TABLE_OR_COL src2) key) 15) (< (. (TOK_TABLE_OR_COL src2) key) 25))))) b) (= (. (TOK_TABLE_OR_COL a) c1) (. (TOK_TABLE_OR_COL b) c3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) c1) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) c2) c2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) c3) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) c4) c4)))) c)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1_rc))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) c1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) c2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) c3)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) c4))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -98,63 +95,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c:a:src1 + Map Operator Tree: TableScan alias: src1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 10) and (key < 20)) - type: boolean + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - c:b:src2 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 15) and (key < 25)) - type: boolean + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -162,22 +135,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string - expr: UDFToInteger(_col2) - type: int - expr: _col3 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 661 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat diff --git ql/src/test/results/clientpositive/reduce_deduplicate.q.out ql/src/test/results/clientpositive/reduce_deduplicate.q.out index 8211cf6..03cc441 100644 --- ql/src/test/results/clientpositive/reduce_deduplicate.q.out +++ ql/src/test/results/clientpositive/reduce_deduplicate.q.out @@ -12,7 +12,24 @@ insert overwrite table bucket5_1 select * from src cluster by key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucket5_1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucket5_1 + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_CLUSTERBY + TOK_TABLE_OR_COL + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -22,38 +39,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + value expressions: _col0 (type: string), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -104,24 +105,17 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 2 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -252,7 +246,131 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: bucket5_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bucket5_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME complex_tbl_2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_TABLE_OR_COL aet) (TOK_TABLE_OR_COL aes)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_TABCOLLIST (TOK_TABCOL aid TOK_STRING) (TOK_TABCOL bid TOK_STRING) (TOK_TABCOL t TOK_INT) (TOK_TABCOL ctime TOK_STRING) (TOK_TABCOL etime TOK_BIGINT) (TOK_TABCOL l TOK_STRING) (TOK_TABCOL et TOK_STRING))))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2010-03-29')) (TOK_CLUSTERBY (TOK_TABLE_OR_COL bid)))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_TABLE_OR_COL aid) (TOK_TABLE_OR_COL bid) (TOK_TABLE_OR_COL t) (TOK_TABLE_OR_COL ctime) (TOK_TABLE_OR_COL etime) (TOK_TABLE_OR_COL l) (TOK_TABLE_OR_COL et)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_TABCOLLIST (TOK_TABCOL aid TOK_STRING) (TOK_TABCOL bid TOK_STRING) (TOK_TABCOL t TOK_INT) (TOK_TABCOL ctime TOK_STRING) (TOK_TABCOL etime TOK_BIGINT) (TOK_TABCOL l TOK_STRING) (TOK_TABCOL et TOK_STRING))))))) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME complex_tbl_1) (TOK_PARTSPEC (TOK_PARTVAL ds '2010-03-29')))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s2)))))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + complex_tbl_2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TRANSFORM + TOK_EXPLIST + TOK_TABLE_OR_COL + aet + TOK_TABLE_OR_COL + aes + TOK_SERDE + TOK_RECORDWRITER + 'cat' + TOK_SERDE + TOK_RECORDREADER + TOK_TABCOLLIST + TOK_TABCOL + aid + TOK_STRING + TOK_TABCOL + bid + TOK_STRING + TOK_TABCOL + t + TOK_INT + TOK_TABCOL + ctime + TOK_STRING + TOK_TABCOL + etime + TOK_BIGINT + TOK_TABCOL + l + TOK_STRING + TOK_TABCOL + et + TOK_STRING + TOK_WHERE + = + TOK_TABLE_OR_COL + ds + '2010-03-29' + TOK_CLUSTERBY + TOK_TABLE_OR_COL + bid + s + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TRANSFORM + TOK_EXPLIST + TOK_TABLE_OR_COL + aid + TOK_TABLE_OR_COL + bid + TOK_TABLE_OR_COL + t + TOK_TABLE_OR_COL + ctime + TOK_TABLE_OR_COL + etime + TOK_TABLE_OR_COL + l + TOK_TABLE_OR_COL + et + TOK_SERDE + TOK_RECORDWRITER + 'cat' + TOK_SERDE + TOK_RECORDREADER + TOK_TABCOLLIST + TOK_TABCOL + aid + TOK_STRING + TOK_TABCOL + bid + TOK_STRING + TOK_TABCOL + t + TOK_INT + TOK_TABCOL + ctime + TOK_STRING + TOK_TABCOL + etime + TOK_BIGINT + TOK_TABCOL + l + TOK_STRING + TOK_TABCOL + et + TOK_STRING + s2 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + complex_tbl_1 + TOK_PARTSPEC + TOK_PARTVAL + ds + '2010-03-29' + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + s2 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -262,93 +380,14 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s2:s:complex_tbl_2 - TableScan - alias: complex_tbl_2 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '2010-03-29') - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Select Operator - expressions: - expr: aet - type: string - expr: aes - type: string - outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Transform Operator - command: cat - output info: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string,string,int,string,bigint,string,string - field.delim 9 - serialization.format 9 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Reduce Output Operator - key expressions: - expr: _col1 - type: string - sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - expr: _col4 - type: bigint - expr: _col5 - type: string - expr: _col6 - type: string Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - expr: _col4 - type: bigint - expr: _col5 - type: string - expr: _col6 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Transform Operator command: cat output info: @@ -361,35 +400,18 @@ STAGE PLANS: serialization.format 9 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - expr: _col4 - type: bigint - expr: _col5 - type: string - expr: _col6 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2010-03-29/ - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/reduce_deduplicate_exclude_join.q.out ql/src/test/results/clientpositive/reduce_deduplicate_exclude_join.q.out index 2ac20b4..7d1d1b9 100644 --- ql/src/test/results/clientpositive/reduce_deduplicate_exclude_join.q.out +++ ql/src/test/results/clientpositive/reduce_deduplicate_exclude_join.q.out @@ -2,9 +2,6 @@ PREHOOK: query: explain select * from (select * from src cluster by key) a join PREHOOK: type: QUERY POSTHOOK: query: explain select * from (select * from src cluster by key) a join src b on a.key = b.key limit 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -13,36 +10,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -50,8 +36,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -59,27 +44,21 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] + 0 _col0 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Position of Big Table: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -94,6 +73,7 @@ STAGE PLANS: b TableScan alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out index 7ad96ed..9c6d14e 100644 --- ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out +++ ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out @@ -12,9 +12,6 @@ POSTHOOK: query: -- HIVE-2340 deduplicate RS followed by RS -- RS-mGBY-RS-rGBY explain select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) Q1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -22,59 +19,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - q1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col0) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: sum(_col0) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -88,9 +66,6 @@ PREHOOK: query: explain select key, sum(key), lower(value) from (select * from s PREHOOK: type: QUERY POSTHOOK: query: explain select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) Q1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION lower (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_FUNCTION lower (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -98,67 +73,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - q1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: lower(_col1) - type: string + key expressions: _col0 (type: string), lower(_col1) (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: lower(_col1) - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string), lower(_col1) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col0) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: lower(_col1) - type: string + aggregations: sum(_col0) + keys: _col0 (type: string), lower(_col1) (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double - expr: _col1 - type: string + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -172,9 +120,6 @@ PREHOOK: query: explain select key, sum(key), (X + 1) from (select key, (value + PREHOOK: type: QUERY POSTHOOK: query: explain select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1 group by key, (X + 1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (+ (TOK_TABLE_OR_COL value) 1) X)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) Q1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (+ (TOK_TABLE_OR_COL X) 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (+ (TOK_TABLE_OR_COL X) 1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -182,67 +127,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - q1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: (value + 1) - type: double + expressions: key (type: string), (value + 1) (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: (_col1 + 1) - type: double + key expressions: _col0 (type: string), (_col1 + 1) (type: double) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: (_col1 + 1) - type: double - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string), (_col1 + 1) (type: double) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col0) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: (_col1 + 1) - type: double + aggregations: sum(_col0) + keys: _col0 (type: string), (_col1 + 1) (type: double) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double - expr: _col1 - type: double + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: double) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -258,9 +176,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- mGBY-RS-rGBY-RS explain select key, sum(key) as value from src group by key order by key, value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key)) value)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -268,58 +183,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(key) - bucketGroup: false - keys: - expr: key - type: string + aggregations: sum(key) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: double + key expressions: _col0 (type: string), _col1 (type: double) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -335,9 +232,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- RS-JOIN-mGBY-RS-rGBY explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (. (TOK_TABLE_OR_COL src) key)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -346,36 +240,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan - alias: src + alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - src1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE TableScan - alias: src1 + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -383,29 +265,20 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col0) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + aggregations: sum(_col0) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -413,47 +286,28 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: double + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double + expressions: _col0 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -469,9 +323,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- RS-JOIN-RS explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -480,36 +331,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan - alias: src + alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - src1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE TableScan - alias: src1 + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -517,18 +356,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -536,27 +371,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -572,9 +399,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- mGBY-RS-rGBY-mGBY-RS-rGBY explain from (select key, value from src group by key, value) s select s.key group by s.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) key))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -582,67 +406,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: complete outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -656,9 +459,6 @@ PREHOOK: query: explain select key, count(distinct value) from (select key, valu PREHOOK: type: QUERY POSTHOOK: query: explain select key, count(distinct value) from (select key, value from src group by key, value) t group by key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -666,73 +466,47 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT _col1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2722,9 +2496,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- RS-RS-GBY explain select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) Q1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2732,59 +2503,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - q1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col0) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: sum(_col0) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2798,9 +2550,6 @@ PREHOOK: query: explain select key, sum(key), lower(value) from (select * from s PREHOOK: type: QUERY POSTHOOK: query: explain select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) Q1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION lower (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_FUNCTION lower (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2808,67 +2557,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - q1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: lower(_col1) - type: string + key expressions: _col0 (type: string), lower(_col1) (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: lower(_col1) - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string), lower(_col1) (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col0) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: lower(_col1) - type: string + aggregations: sum(_col0) + keys: _col0 (type: string), lower(_col1) (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double - expr: _col1 - type: string + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2882,9 +2604,6 @@ PREHOOK: query: explain select key, sum(key), (X + 1) from (select key, (value + PREHOOK: type: QUERY POSTHOOK: query: explain select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1 group by key, (X + 1) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (+ (TOK_TABLE_OR_COL value) 1) X)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) Q1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (+ (TOK_TABLE_OR_COL X) 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (+ (TOK_TABLE_OR_COL X) 1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2892,67 +2611,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - q1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: (value + 1) - type: double + expressions: key (type: string), (value + 1) (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: (_col1 + 1) - type: double + key expressions: _col0 (type: string), (_col1 + 1) (type: double) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: (_col1 + 1) - type: double - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: double + Map-reduce partition columns: _col0 (type: string), (_col1 + 1) (type: double) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(_col0) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: (_col1 + 1) - type: double + aggregations: sum(_col0) + keys: _col0 (type: string), (_col1 + 1) (type: double) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double - expr: _col1 - type: double + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: double) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2968,9 +2660,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- RS-GBY-RS explain select key, sum(key) as value from src group by key order by key, value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key)) value)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2978,46 +2667,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: _col1 - type: double + key expressions: key (type: string), _col1 (type: double) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - tag: -1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(KEY._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(KEY._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3033,9 +2709,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- RS-JOIN-RS-GBY explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (. (TOK_TABLE_OR_COL src) key)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3044,36 +2717,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan - alias: src + alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - src1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE TableScan - alias: src1 + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -3081,18 +2742,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3100,44 +2757,27 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(KEY._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: sum(KEY._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: double + expressions: _col0 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3153,9 +2793,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- RS-JOIN-RS explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -3164,36 +2801,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan - alias: src + alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - src1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE TableScan - alias: src1 + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -3201,18 +2826,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3220,27 +2841,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3256,9 +2869,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- RS-GBY-RS-GBY explain from (select key, value from src group by key, value) s select s.key group by s.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) key))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -3266,58 +2876,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - tag: -1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: complete outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3331,9 +2924,6 @@ PREHOOK: query: explain select key, count(distinct value) from (select key, valu PREHOOK: type: QUERY POSTHOOK: query: explain select key, count(distinct value) from (select key, value from src group by key, value) t group by key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -3341,64 +2931,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - tag: -1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT _col1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/regex_col.q.out ql/src/test/results/clientpositive/regex_col.q.out index bb5105e..33615ac 100644 --- ql/src/test/results/clientpositive/regex_col.q.out +++ ql/src/test/results/clientpositive/regex_col.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM srcpart POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -17,17 +14,11 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: EXPLAIN @@ -36,9 +27,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT `..` FROM srcpart POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL `..`))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -46,20 +34,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: ds - type: string - expr: hr - type: string + expressions: ds (type: string), hr (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -75,9 +60,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT srcpart.`..` FROM srcpart POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) `..`))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -85,20 +67,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: ds - type: string - expr: hr - type: string + expressions: ds (type: string), hr (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -116,9 +95,6 @@ POSTHOOK: query: EXPLAIN SELECT `..` FROM srcpart a JOIN srcpart b ON a.key = b.key AND a.value = b.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL `..`))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -126,49 +102,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: 0 - value expressions: - expr: ds - type: string - expr: hr - type: string - b + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + value expressions: ds (type: string), hr (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: 1 - value expressions: - expr: ds - type: string - expr: hr - type: string + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + value expressions: ds (type: string), hr (type: string) Reduce Operator Tree: Join Operator condition map: @@ -176,22 +128,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col2} {VALUE._col3} 1 {VALUE._col2} {VALUE._col3} - handleSkewJoin: false outputColumnNames: _col2, _col3, _col8, _col9 + Statistics: Num rows: 127 Data size: 25572 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col2 (type: string), _col3 (type: string), _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 127 Data size: 25572 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 127 Data size: 25572 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -211,9 +156,6 @@ SELECT b.`..` FROM srcpart a JOIN srcpart b ON a.key = b.key AND a.hr = b.hr AND a.ds = b.ds AND a.key = 103 ORDER BY ds, hr POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (AND (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) hr) (. (TOK_TABLE_OR_COL b) hr))) (= (. (TOK_TABLE_OR_COL a) ds) (. (TOK_TABLE_OR_COL b) ds))) (= (. (TOK_TABLE_OR_COL a) key) 103)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) `..`))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL hr))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -222,60 +164,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 103) - type: boolean + predicate: (key = 103) (type: boolean) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: hr - type: string - expr: ds - type: string + key expressions: key (type: string), hr (type: string), ds (type: string) sort order: +++ - Map-reduce partition columns: - expr: key - type: string - expr: hr - type: string - expr: ds - type: string - tag: 0 - b + Map-reduce partition columns: key (type: string), hr (type: string), ds (type: string) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + value expressions: ds (type: string), hr (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 103) - type: boolean + predicate: (key = 103) (type: boolean) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: hr - type: string - expr: ds - type: string + key expressions: key (type: string), hr (type: string), ds (type: string) sort order: +++ - Map-reduce partition columns: - expr: key - type: string - expr: hr - type: string - expr: ds - type: string - tag: 1 - value expressions: - expr: ds - type: string - expr: hr - type: string + Map-reduce partition columns: key (type: string), hr (type: string), ds (type: string) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -283,18 +195,14 @@ STAGE PLANS: condition expressions: 0 1 {VALUE._col2} {VALUE._col3} - handleSkewJoin: false outputColumnNames: _col8, _col9 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -302,27 +210,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -374,9 +274,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT `.e.` FROM srcpart POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL `.e.`))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -384,18 +281,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 232 Data size: 23248 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -411,9 +307,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT `d.*` FROM srcpart POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL `d.*`))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -421,18 +314,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: ds - type: string + expressions: ds (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -448,9 +340,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT `(ds)?+.+` FROM srcpart POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL `(ds)?+.+`))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -458,22 +347,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -489,9 +373,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT `(ds|hr)?+.+` FROM srcpart ORDER BY key, value LIMIT 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL `(ds|hr)?+.+`))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -499,36 +380,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/regexp_extract.q.out ql/src/test/results/clientpositive/regexp_extract.q.out index 42802b5..60b6fda 100644 --- ql/src/test/results/clientpositive/regexp_extract.q.out +++ ql/src/test/results/clientpositive/regexp_extract.q.out @@ -17,7 +17,73 @@ FROM ( SELECT tmap.key, regexp_extract(tmap.value, 'val_(\\d+\\t\\d+)',1) WHERE tmap.key < 100 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value) (+ 1 2) (+ 3 4)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER))) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) key)) (TOK_SELEXPR (TOK_FUNCTION regexp_extract (. (TOK_TABLE_OR_COL tmap) value) 'val_(\\d+\\t\\d+)' 1))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL tmap) key) 100)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TRANSFORM + TOK_EXPLIST + . + TOK_TABLE_OR_COL + src + key + . + TOK_TABLE_OR_COL + src + value + + + 1 + 2 + + + 3 + 4 + TOK_SERDE + TOK_RECORDWRITER + 'cat' + TOK_SERDE + TOK_RECORDREADER + TOK_CLUSTERBY + TOK_TABLE_OR_COL + key + tmap + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + tmap + key + TOK_SELEXPR + TOK_FUNCTION + regexp_extract + . + TOK_TABLE_OR_COL + tmap + value + 'val_(\\d+\\t\\d+)' + 1 + TOK_WHERE + < + . + TOK_TABLE_OR_COL + tmap + key + 100 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -26,26 +92,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: (1 + 2) - type: int - expr: (3 + 4) - type: int + expressions: key (type: string), value (type: string), (1 + 2) (type: int), (3 + 4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: @@ -59,31 +114,18 @@ STAGE PLANS: serialization.last.column.takes.rest true serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: (_col0 < 100) - type: boolean - Statistics: - numRows: 9 dataSize: 1803 basicStatsState: COMPLETE colStatsState: NONE + predicate: (_col0 < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 9 dataSize: 1803 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + value expressions: _col0 (type: string), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -134,24 +176,17 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 9 dataSize: 1803 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: regexp_extract(_col1, 'val_(\d+\t\d+)', 1) - type: string + expressions: _col0 (type: string), regexp_extract(_col1, 'val_(\d+\t\d+)', 1) (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 9 dataSize: 1803 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 9 dataSize: 1803 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -295,7 +330,72 @@ FROM ( SELECT tmap.key, regexp_extract(tmap.value, 'val_(\\d+\\t\\d+)') WHERE tmap.key < 100 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value) (+ 1 2) (+ 3 4)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER))) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) key)) (TOK_SELEXPR (TOK_FUNCTION regexp_extract (. (TOK_TABLE_OR_COL tmap) value) 'val_(\\d+\\t\\d+)'))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL tmap) key) 100)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TRANSFORM + TOK_EXPLIST + . + TOK_TABLE_OR_COL + src + key + . + TOK_TABLE_OR_COL + src + value + + + 1 + 2 + + + 3 + 4 + TOK_SERDE + TOK_RECORDWRITER + 'cat' + TOK_SERDE + TOK_RECORDREADER + TOK_CLUSTERBY + TOK_TABLE_OR_COL + key + tmap + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + tmap + key + TOK_SELEXPR + TOK_FUNCTION + regexp_extract + . + TOK_TABLE_OR_COL + tmap + value + 'val_(\\d+\\t\\d+)' + TOK_WHERE + < + . + TOK_TABLE_OR_COL + tmap + key + 100 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -304,26 +404,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: (1 + 2) - type: int - expr: (3 + 4) - type: int + expressions: key (type: string), value (type: string), (1 + 2) (type: int), (3 + 4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: @@ -337,31 +426,18 @@ STAGE PLANS: serialization.last.column.takes.rest true serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: (_col0 < 100) - type: boolean - Statistics: - numRows: 9 dataSize: 1803 basicStatsState: COMPLETE colStatsState: NONE + predicate: (_col0 < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 9 dataSize: 1803 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + value expressions: _col0 (type: string), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -412,24 +488,17 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 9 dataSize: 1803 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: regexp_extract(_col1, 'val_(\d+\t\d+)') - type: string + expressions: _col0 (type: string), regexp_extract(_col1, 'val_(\d+\t\d+)') (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 9 dataSize: 1803 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 9 dataSize: 1803 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/router_join_ppr.q.out ql/src/test/results/clientpositive/router_join_ppr.q.out index 1b9124f..8deb390 100644 --- ql/src/test/results/clientpositive/router_join_ppr.q.out +++ ql/src/test/results/clientpositive/router_join_ppr.q.out @@ -17,7 +17,88 @@ POSTHOOK: query: EXPLAIN EXTENDED WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL b) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 10) (< (. (TOK_TABLE_OR_COL a) key) 20)) (> (. (TOK_TABLE_OR_COL b) key) 15)) (< (. (TOK_TABLE_OR_COL b) key) 25))))) + +TOK_QUERY + TOK_FROM + TOK_RIGHTOUTERJOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_TABREF + TOK_TABNAME + srcpart + b + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -26,67 +107,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + alias: b + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key > 15) and (key < 25)) - type: boolean - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string), value (type: string), ds (type: string) TableScan - alias: b - Statistics: - numRows: 116 dataSize: 23248 basicStatsState: COMPLETE colStatsState: NONE + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key > 15) and (key < 25)) - type: boolean - Statistics: - numRows: 12 dataSize: 2404 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 12 dataSize: 2404 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string), value (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -319,37 +370,22 @@ STAGE PLANS: filter predicates: 0 1 {(VALUE._col2 = '2008-04-08')} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 - Statistics: - numRows: 13 dataSize: 2644 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 13 Data size: 2644 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean - Statistics: - numRows: 1 dataSize: 203 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 1 Data size: 203 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 1 dataSize: 203 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 203 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 203 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 203 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -431,7 +467,88 @@ POSTHOOK: query: EXPLAIN EXTENDED WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME srcpart) a) (TOK_TABREF (TOK_TABNAME src) b) (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 10) (< (. (TOK_TABLE_OR_COL a) key) 20)) (> (. (TOK_TABLE_OR_COL b) key) 15)) (< (. (TOK_TABLE_OR_COL b) key) 25))))) + +TOK_QUERY + TOK_FROM + TOK_RIGHTOUTERJOIN + TOK_TABREF + TOK_TABNAME + srcpart + a + TOK_TABREF + TOK_TABNAME + src + b + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -440,65 +557,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key > 15) and (key < 25)) - type: boolean - Statistics: - numRows: 6 dataSize: 1202 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 6 dataSize: 1202 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string), value (type: string) TableScan - alias: b - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + alias: a + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key > 15) and (key < 25)) - type: boolean - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string), value (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -640,37 +729,22 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: - numRows: 6 dataSize: 1322 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 1322 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col6 - type: string - expr: _col7 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col7 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -748,7 +822,88 @@ POSTHOOK: query: EXPLAIN EXTENDED WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (AND (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 10) (< (. (TOK_TABLE_OR_COL a) key) 20)) (> (. (TOK_TABLE_OR_COL b) key) 15)) (< (. (TOK_TABLE_OR_COL b) key) 25)) (= (. (TOK_TABLE_OR_COL b) ds) '2008-04-08'))))) + +TOK_QUERY + TOK_FROM + TOK_RIGHTOUTERJOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_TABREF + TOK_TABNAME + srcpart + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -757,65 +912,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + alias: b + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key > 15) and (key < 25)) - type: boolean - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 6 Data size: 1202 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string), value (type: string) TableScan - alias: b - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key > 15) and (key < 25)) - type: boolean - Statistics: - numRows: 6 dataSize: 1202 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 6 dataSize: 1202 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string), value (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -957,37 +1084,22 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 - Statistics: - numRows: 6 dataSize: 1322 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 6 Data size: 1322 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: ((_col0 > 10) and (_col0 < 20)) - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1065,7 +1177,88 @@ POSTHOOK: query: EXPLAIN EXTENDED WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND a.ds = '2008-04-08' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME srcpart) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (AND (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 10) (< (. (TOK_TABLE_OR_COL a) key) 20)) (> (. (TOK_TABLE_OR_COL b) key) 15)) (< (. (TOK_TABLE_OR_COL b) key) 25)) (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08'))))) + +TOK_QUERY + TOK_FROM + TOK_RIGHTOUTERJOIN + TOK_TABREF + TOK_TABNAME + srcpart + a + TOK_TABREF + TOK_TABNAME + src + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1074,67 +1267,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a - Statistics: - numRows: 116 dataSize: 23248 basicStatsState: COMPLETE colStatsState: NONE + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key > 15) and (key < 25)) - type: boolean - Statistics: - numRows: 12 dataSize: 2404 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 12 dataSize: 2404 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string), value (type: string) TableScan - alias: b - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + alias: a + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((key > 15) and (key < 25)) - type: boolean - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 3 dataSize: 601 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 12 Data size: 2404 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string), value (type: string), ds (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1362,37 +1525,22 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col6, _col7 - Statistics: - numRows: 13 dataSize: 2644 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 13 Data size: 2644 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: (((_col0 > 10) and (_col0 < 20)) and (_col2 = '2008-04-08')) - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + predicate: (((_col0 > 10) and (_col0 < 20)) and (_col2 = '2008-04-08')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col6 - type: string - expr: _col7 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col7 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/sample1.q.out ql/src/test/results/clientpositive/sample1.q.out index 105a42a..eb7a552 100644 --- ql/src/test/results/clientpositive/sample1.q.out +++ ql/src/test/results/clientpositive/sample1.q.out @@ -16,7 +16,43 @@ FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1 ON rand()) s WHERE s.ds='2008-04-08' and s.hr='11' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) (TOK_TABLEBUCKETSAMPLE 1 1 (TOK_FUNCTION rand)) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL s) ds) '2008-04-08') (= (. (TOK_TABLE_OR_COL s) hr) '11'))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + TOK_TABLEBUCKETSAMPLE + 1 + 1 + TOK_FUNCTION + rand + s + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest1 + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + s + TOK_WHERE + and + = + . + TOK_TABLE_OR_COL + s + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + s + hr + '11' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -31,40 +67,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: - expr: (((hash(rand()) & 2147483647) % 1) = 0) - type: boolean - Statistics: - numRows: 14 dataSize: 2805 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((hash(rand()) & 2147483647) % 1) = 0) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: UDFToInteger(key) (type: int), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 14 dataSize: 2805 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 14 dataSize: 2805 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -168,8 +189,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -235,8 +255,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/sample10.q.out ql/src/test/results/clientpositive/sample10.q.out index c7be38d..beca660 100644 --- ql/src/test/results/clientpositive/sample10.q.out +++ ql/src/test/results/clientpositive/sample10.q.out @@ -49,7 +49,42 @@ POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(s POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpartbucket) (TOK_TABLEBUCKETSAMPLE 1 4 (TOK_TABLE_OR_COL key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL ds))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpartbucket + TOK_TABLEBUCKETSAMPLE + 1 + 4 + TOK_TABLE_OR_COL + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_WHERE + TOK_FUNCTION + TOK_ISNOTNULL + TOK_TABLE_OR_COL + ds + TOK_GROUPBY + TOK_TABLE_OR_COL + ds + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -59,52 +94,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpartbucket + Map Operator Tree: TableScan alias: srcpartbucket - Statistics: - numRows: 40 dataSize: 240 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 40 Data size: 240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: - expr: (((hash(key) & 2147483647) % 4) = 0) - type: boolean - Statistics: - numRows: 20 dataSize: 120 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((hash(key) & 2147483647) % 4) = 0) (type: boolean) + Statistics: Num rows: 20 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: ds - type: string + expressions: ds (type: string) outputColumnNames: ds - Statistics: - numRows: 20 dataSize: 120 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 20 Data size: 120 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: ds - type: string + aggregations: count(1) + keys: ds (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: - numRows: 20 dataSize: 120 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 20 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 20 dataSize: 120 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 120 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col1 - type: bigint + value expressions: _col1 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -292,25 +307,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 10 dataSize: 60 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 10 dataSize: 60 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -331,23 +336,15 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Statistics: - numRows: 10 dataSize: 60 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + value expressions: _col0 (type: string), _col1 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -376,15 +373,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 10 dataSize: 60 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 10 dataSize: 60 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/sample2.q.out ql/src/test/results/clientpositive/sample2.q.out index 8c53507..6282873 100644 --- ql/src/test/results/clientpositive/sample2.q.out +++ ql/src/test/results/clientpositive/sample2.q.out @@ -16,7 +16,27 @@ INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 2) s POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcbucket) (TOK_TABLEBUCKETSAMPLE 1 2) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s)))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket + TOK_TABLEBUCKETSAMPLE + 1 + 2 + s + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest1 + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + s + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -31,36 +51,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s - Statistics: - numRows: 111 dataSize: 11603 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 111 Data size: 11603 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: - expr: (((hash(key) & 2147483647) % 2) = 0) - type: boolean - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -166,8 +175,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -233,8 +241,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/sample3.q.out ql/src/test/results/clientpositive/sample3.q.out index 28b6ee6..da4175a 100644 --- ql/src/test/results/clientpositive/sample3.q.out +++ ql/src/test/results/clientpositive/sample3.q.out @@ -8,9 +8,6 @@ EXPLAIN SELECT s.key FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 5 on key) s POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcbucket) (TOK_TABLEBUCKETSAMPLE 1 5 (TOK_TABLE_OR_COL key)) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -18,22 +15,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s + Statistics: Num rows: 2900 Data size: 11603 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((hash(key) & 2147483647) % 5) = 0) - type: boolean + predicate: (((hash(key) & 2147483647) % 5) = 0) (type: boolean) + Statistics: Num rows: 1450 Data size: 5801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1450 Data size: 5801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1450 Data size: 5801 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/sample4.q.out ql/src/test/results/clientpositive/sample4.q.out index a87ece9..859e0c4 100644 --- ql/src/test/results/clientpositive/sample4.q.out +++ ql/src/test/results/clientpositive/sample4.q.out @@ -16,7 +16,29 @@ INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 2 on key) s POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcbucket) (TOK_TABLEBUCKETSAMPLE 1 2 (TOK_TABLE_OR_COL key)) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s)))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket + TOK_TABLEBUCKETSAMPLE + 1 + 2 + TOK_TABLE_OR_COL + key + s + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest1 + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + s + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -31,36 +53,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s - Statistics: - numRows: 111 dataSize: 11603 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 111 Data size: 11603 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: - expr: (((hash(key) & 2147483647) % 2) = 0) - type: boolean - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -166,8 +177,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -233,8 +243,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/sample5.q.out ql/src/test/results/clientpositive/sample5.q.out index 42ab6fc..60855f4 100644 --- ql/src/test/results/clientpositive/sample5.q.out +++ ql/src/test/results/clientpositive/sample5.q.out @@ -14,7 +14,29 @@ INSERT OVERWRITE TABLE dest1 SELECT s.* -- here's another test FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 5 on key) s POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcbucket) (TOK_TABLEBUCKETSAMPLE 1 5 (TOK_TABLE_OR_COL key)) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s)))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket + TOK_TABLEBUCKETSAMPLE + 1 + 5 + TOK_TABLE_OR_COL + key + s + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest1 + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + s + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -29,36 +51,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s - Statistics: - numRows: 111 dataSize: 11603 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 111 Data size: 11603 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: - expr: (((hash(key) & 2147483647) % 5) = 0) - type: boolean - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((hash(key) & 2147483647) % 5) = 0) (type: boolean) + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -164,8 +175,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -231,8 +241,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/sample6.q.out ql/src/test/results/clientpositive/sample6.q.out index 1ca5bf3..32a1141 100644 --- ql/src/test/results/clientpositive/sample6.q.out +++ ql/src/test/results/clientpositive/sample6.q.out @@ -14,7 +14,29 @@ INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 4 on key) s POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcbucket) (TOK_TABLEBUCKETSAMPLE 1 4 (TOK_TABLE_OR_COL key)) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s)))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket + TOK_TABLEBUCKETSAMPLE + 1 + 4 + TOK_TABLE_OR_COL + key + s + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest1 + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + s + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -29,36 +51,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s - Statistics: - numRows: 111 dataSize: 11603 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 111 Data size: 11603 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: - expr: (((hash(key) & 2147483647) % 4) = 0) - type: boolean - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((hash(key) & 2147483647) % 4) = 0) (type: boolean) + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -164,8 +175,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -231,8 +241,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -582,7 +591,35 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.key SIMPLE [(srcbucket)s.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: dest1.value SIMPLE [(srcbucket)s.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcbucket) (TOK_TABLEBUCKETSAMPLE 4 4 (TOK_TABLE_OR_COL key)) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket + TOK_TABLEBUCKETSAMPLE + 4 + 4 + TOK_TABLE_OR_COL + key + s + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + s + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -591,44 +628,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s - Statistics: - numRows: 111 dataSize: 11603 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 111 Data size: 11603 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: - expr: (((hash(key) & 2147483647) % 4) = 3) - type: boolean - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((hash(key) & 2147483647) % 4) = 3) (type: boolean) + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + value expressions: _col0 (type: int), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -681,15 +699,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -970,7 +986,35 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.key SIMPLE [(srcbucket)s.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: dest1.value SIMPLE [(srcbucket)s.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcbucket) (TOK_TABLEBUCKETSAMPLE 1 2 (TOK_TABLE_OR_COL key)) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket + TOK_TABLEBUCKETSAMPLE + 1 + 2 + TOK_TABLE_OR_COL + key + s + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + s + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -979,44 +1023,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s - Statistics: - numRows: 111 dataSize: 11603 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 111 Data size: 11603 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: - expr: (((hash(key) & 2147483647) % 2) = 0) - type: boolean - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + value expressions: _col0 (type: int), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1069,15 +1094,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1612,7 +1635,35 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.key SIMPLE [(srcbucket)s.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: dest1.value SIMPLE [(srcbucket)s.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcbucket) (TOK_TABLEBUCKETSAMPLE 1 3 (TOK_TABLE_OR_COL key)) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket + TOK_TABLEBUCKETSAMPLE + 1 + 3 + TOK_TABLE_OR_COL + key + s + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + s + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1621,44 +1672,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s - Statistics: - numRows: 111 dataSize: 11603 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 111 Data size: 11603 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: - expr: (((hash(key) & 2147483647) % 3) = 0) - type: boolean - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((hash(key) & 2147483647) % 3) = 0) (type: boolean) + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + value expressions: _col0 (type: int), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1711,15 +1743,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2097,7 +2127,35 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.key SIMPLE [(srcbucket)s.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: dest1.value SIMPLE [(srcbucket)s.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcbucket) (TOK_TABLEBUCKETSAMPLE 2 3 (TOK_TABLE_OR_COL key)) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket + TOK_TABLEBUCKETSAMPLE + 2 + 3 + TOK_TABLE_OR_COL + key + s + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + s + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2106,44 +2164,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s - Statistics: - numRows: 111 dataSize: 11603 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 111 Data size: 11603 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: - expr: (((hash(key) & 2147483647) % 3) = 1) - type: boolean - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((hash(key) & 2147483647) % 3) = 1) (type: boolean) + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + value expressions: _col0 (type: int), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2196,15 +2235,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2568,7 +2605,35 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.key SIMPLE [(srcbucket)s.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: dest1.value SIMPLE [(srcbucket)s.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcbucket2) (TOK_TABLEBUCKETSAMPLE 1 2 (TOK_TABLE_OR_COL key)) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket2 + TOK_TABLEBUCKETSAMPLE + 1 + 2 + TOK_TABLE_OR_COL + key + s + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + s + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2577,44 +2642,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s - Statistics: - numRows: 55 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: - expr: (((hash(key) & 2147483647) % 2) = 0) - type: boolean - Statistics: - numRows: 27 dataSize: 2853 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 27 dataSize: 2853 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: - numRows: 27 dataSize: 2853 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + value expressions: _col0 (type: int), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2712,15 +2758,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 27 dataSize: 2853 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 27 dataSize: 2853 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2884,7 +2928,35 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.key SIMPLE [(srcbucket)s.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: dest1.value SIMPLE [(srcbucket)s.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcbucket2) (TOK_TABLEBUCKETSAMPLE 2 4 (TOK_TABLE_OR_COL key)) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket2 + TOK_TABLEBUCKETSAMPLE + 2 + 4 + TOK_TABLE_OR_COL + key + s + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + s + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2893,44 +2965,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s - Statistics: - numRows: 55 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: - expr: (((hash(key) & 2147483647) % 4) = 1) - type: boolean - Statistics: - numRows: 27 dataSize: 2853 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((hash(key) & 2147483647) % 4) = 1) (type: boolean) + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 27 dataSize: 2853 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: - numRows: 27 dataSize: 2853 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + value expressions: _col0 (type: int), _col1 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2983,15 +3036,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 27 dataSize: 2853 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 27 dataSize: 2853 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3078,7 +3129,35 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.key SIMPLE [(srcbucket)s.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: dest1.value SIMPLE [(srcbucket)s.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME empty_bucket) (TOK_TABLEBUCKETSAMPLE 1 2 (TOK_TABLE_OR_COL key)) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + empty_bucket + TOK_TABLEBUCKETSAMPLE + 1 + 2 + TOK_TABLE_OR_COL + key + s + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + s + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -3087,56 +3166,16 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s - TableScan - alias: s - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - GatherStats: false - Filter Operator - isSamplingPred: true - predicate: - expr: (((hash(key) & 2147483647) % 2) = 0) - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string - sort order: ++ - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/sample7.q.out ql/src/test/results/clientpositive/sample7.q.out index acdf5b9..cf018f5 100644 --- ql/src/test/results/clientpositive/sample7.q.out +++ ql/src/test/results/clientpositive/sample7.q.out @@ -16,7 +16,36 @@ FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 4 on key) s WHERE s.key > 100 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcbucket) (TOK_TABLEBUCKETSAMPLE 1 4 (TOK_TABLE_OR_COL key)) s)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s)))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s) key) 100)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket + TOK_TABLEBUCKETSAMPLE + 1 + 4 + TOK_TABLE_OR_COL + key + s + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest1 + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + s + TOK_WHERE + > + . + TOK_TABLE_OR_COL + s + key + 100 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -31,36 +60,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan alias: s - Statistics: - numRows: 111 dataSize: 11603 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 111 Data size: 11603 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((((hash(key) & 2147483647) % 4) = 0) and (key > 100)) - type: boolean - Statistics: - numRows: 18 dataSize: 1881 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((((hash(key) & 2147483647) % 4) = 0) and (key > 100)) (type: boolean) + Statistics: Num rows: 18 Data size: 1881 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 18 dataSize: 1881 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 18 Data size: 1881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 18 dataSize: 1881 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 18 Data size: 1881 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -166,8 +184,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -233,8 +250,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/sample8.q.out ql/src/test/results/clientpositive/sample8.q.out index ae1f56a..db65ce3 100644 --- ql/src/test/results/clientpositive/sample8.q.out +++ ql/src/test/results/clientpositive/sample8.q.out @@ -17,7 +17,98 @@ DISTRIBUTE BY key, value SORT BY key, value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) (TOK_TABLEBUCKETSAMPLE 1 1 (TOK_TABLE_OR_COL key)) s) (TOK_TABREF (TOK_TABNAME srcpart) (TOK_TABLEBUCKETSAMPLE 1 10 (TOK_TABLE_OR_COL key)) t))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s)))) (TOK_WHERE (and (and (and (and (and (= (. (TOK_TABLE_OR_COL t) key) (. (TOK_TABLE_OR_COL s) key)) (= (. (TOK_TABLE_OR_COL t) value) (. (TOK_TABLE_OR_COL s) value))) (= (. (TOK_TABLE_OR_COL s) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL s) hr) '11')) (= (. (TOK_TABLE_OR_COL s) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL s) hr) '11'))) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcpart + TOK_TABLEBUCKETSAMPLE + 1 + 1 + TOK_TABLE_OR_COL + key + s + TOK_TABREF + TOK_TABNAME + srcpart + TOK_TABLEBUCKETSAMPLE + 1 + 10 + TOK_TABLE_OR_COL + key + t + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + s + TOK_WHERE + and + and + and + and + and + = + . + TOK_TABLE_OR_COL + t + key + . + TOK_TABLE_OR_COL + s + key + = + . + TOK_TABLE_OR_COL + t + value + . + TOK_TABLE_OR_COL + s + value + = + . + TOK_TABLE_OR_COL + s + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + s + hr + '11' + = + . + TOK_TABLE_OR_COL + s + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + s + hr + '11' + TOK_DISTRIBUTEBY + TOK_TABLE_OR_COL + key + TOK_TABLE_OR_COL + value + TOK_SORTBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -27,77 +118,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s + Map Operator Tree: TableScan - alias: s - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + alias: t + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: - expr: ((((hash(key) & 2147483647) % 10) = 0) and (((hash(key) & 2147483647) % 1) = 0)) - type: boolean - Statistics: - numRows: 7 dataSize: 1402 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((((hash(key) & 2147483647) % 1) = 0) and (((hash(key) & 2147483647) % 10) = 0)) (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - Statistics: - numRows: 7 dataSize: 1402 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string - t + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string), value (type: string) TableScan - alias: t - Statistics: - numRows: 116 dataSize: 23248 basicStatsState: COMPLETE colStatsState: NONE + alias: s + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: - expr: ((((hash(key) & 2147483647) % 1) = 0) and (((hash(key) & 2147483647) % 10) = 0)) - type: boolean - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((((hash(key) & 2147483647) % 10) = 0) and (((hash(key) & 2147483647) % 1) = 0)) (type: boolean) + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 7 Data size: 1402 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -282,30 +333,16 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col6, _col7 - Statistics: - numRows: 31 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: ((((((_col6 = _col0) and (_col7 = _col1)) and (_col2 = '2008-04-08')) and (_col3 = '11')) and (_col2 = '2008-04-08')) and (_col3 = '11')) - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + predicate: ((((((_col6 = _col0) and (_col7 = _col1)) and (_col2 = '2008-04-08')) and (_col3 = '11')) and (_col2 = '2008-04-08')) and (_col3 = '11')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -326,34 +363,16 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -382,15 +401,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/sample9.q.out ql/src/test/results/clientpositive/sample9.q.out index 17d009f..9e1fb12 100644 --- ql/src/test/results/clientpositive/sample9.q.out +++ ql/src/test/results/clientpositive/sample9.q.out @@ -7,7 +7,41 @@ SELECT s.* FROM (SELECT a.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 2 on key) a) s POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcbucket) (TOK_TABLEBUCKETSAMPLE 1 2 (TOK_TABLE_OR_COL key)) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s)))))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket + TOK_TABLEBUCKETSAMPLE + 1 + 2 + TOK_TABLE_OR_COL + key + a + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + a + s + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + s + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -16,36 +50,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - s:a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 111 dataSize: 11603 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 111 Data size: 11603 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: - expr: (((hash(key) & 2147483647) % 2) = 0) - type: boolean - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 55 dataSize: 5749 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5749 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/script_pipe.q.out ql/src/test/results/clientpositive/script_pipe.q.out index 2c91cc4..cec5689 100644 --- ql/src/test/results/clientpositive/script_pipe.q.out +++ ql/src/test/results/clientpositive/script_pipe.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- Tests exception in ScriptOperator.close() by passing to the operator a small amount of data EXPLAIN SELECT TRANSFORM(*) USING 'true' AS a, b, c FROM (SELECT * FROM src LIMIT 1) tmp POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 1))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST TOK_ALLCOLREF) TOK_SERDE TOK_RECORDWRITER 'true' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST a b c)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,45 +11,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmp:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Transform Operator command: true output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -68,9 +61,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- Tests exception in ScriptOperator.processOp() by passing extra data needed to fill pipe buffer EXPLAIN SELECT TRANSFORM(key, value, key, value, key, value, key, value, key, value, key, value) USING 'head -n 1' as a,b,c,d FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)) TOK_SERDE TOK_RECORDWRITER 'head -n 1' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST a b c d)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -78,46 +68,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: key - type: string - expr: value - type: string - expr: key - type: string - expr: value - type: string - expr: key - type: string - expr: value - type: string - expr: key - type: string - expr: value - type: string - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string), key (type: string), value (type: string), key (type: string), value (type: string), key (type: string), value (type: string), key (type: string), value (type: string), key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: head -n 1 output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/select_as_omitted.q.out ql/src/test/results/clientpositive/select_as_omitted.q.out index f58d9c5..6c0b3ba 100644 --- ql/src/test/results/clientpositive/select_as_omitted.q.out +++ ql/src/test/results/clientpositive/select_as_omitted.q.out @@ -12,9 +12,6 @@ SELECT a, b FROM ( ) src1 ORDER BY a LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) a) (TOK_SELEXPR (TOK_TABLE_OR_COL value) b)))) src1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL a))) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -22,34 +19,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/select_transform_hint.q.out ql/src/test/results/clientpositive/select_transform_hint.q.out index 3aafd10..0f39ba4 100644 --- ql/src/test/results/clientpositive/select_transform_hint.q.out +++ ql/src/test/results/clientpositive/select_transform_hint.q.out @@ -10,9 +10,6 @@ TRANSFORM(a.key, a.value) USING 'cat' AS (tkey, tvalue) FROM src a join src b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL a) value)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST tkey tvalue)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -20,38 +17,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1 - Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -66,6 +60,7 @@ STAGE PLANS: a TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1125,9 +1120,6 @@ TRANSFORM(a.key, a.value) USING 'cat' AS (tkey, tvalue) FROM src a join src b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_STREAMTABLE (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL a) value)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST tkey tvalue)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1135,36 +1127,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1172,24 +1152,22 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/select_unquote_and.q.out ql/src/test/results/clientpositive/select_unquote_and.q.out index 258a4ba..bb02285 100644 --- ql/src/test/results/clientpositive/select_unquote_and.q.out +++ ql/src/test/results/clientpositive/select_unquote_and.q.out @@ -54,9 +54,6 @@ POSTHOOK: Lineage: npe_test PARTITION(ds=2012-12-11).key SIMPLE [(src)src.FieldS POSTHOOK: Lineage: npe_test PARTITION(ds=2012-12-11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: npe_test PARTITION(ds=2012-12-12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: npe_test PARTITION(ds=2012-12-12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME npe_test))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL ds) (- (- 2012 11) 31)) (< (TOK_TABLE_OR_COL ds) (- (- 2012 12) 15)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -64,26 +61,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - npe_test + Map Operator Tree: TableScan alias: npe_test + Statistics: Num rows: 498 Data size: 5290 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((ds > ((2012 - 11) - 31)) and (ds < ((2012 - 12) - 15))) - type: boolean + predicate: ((ds > ((2012 - 11) - 31)) and (ds < ((2012 - 12) - 15))) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string + expressions: key (type: string), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/select_unquote_not.q.out ql/src/test/results/clientpositive/select_unquote_not.q.out index cd6fd80..36f5c77 100644 --- ql/src/test/results/clientpositive/select_unquote_not.q.out +++ ql/src/test/results/clientpositive/select_unquote_not.q.out @@ -54,9 +54,6 @@ POSTHOOK: Lineage: npe_test PARTITION(ds=2012-12-11).key SIMPLE [(src)src.FieldS POSTHOOK: Lineage: npe_test PARTITION(ds=2012-12-11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: npe_test PARTITION(ds=2012-12-12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: npe_test PARTITION(ds=2012-12-12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME npe_test))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (NOT (< (TOK_TABLE_OR_COL ds) (- (- 2012 11) 31)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -64,26 +61,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - npe_test + Map Operator Tree: TableScan alias: npe_test + Statistics: Num rows: 498 Data size: 5290 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (not (ds < ((2012 - 11) - 31))) - type: boolean + predicate: (not (ds < ((2012 - 11) - 31))) (type: boolean) + Statistics: Num rows: 332 Data size: 3526 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string + expressions: key (type: string), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 332 Data size: 3526 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 332 Data size: 3526 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/select_unquote_or.q.out ql/src/test/results/clientpositive/select_unquote_or.q.out index e4e8953..5b68691 100644 --- ql/src/test/results/clientpositive/select_unquote_or.q.out +++ ql/src/test/results/clientpositive/select_unquote_or.q.out @@ -54,9 +54,6 @@ POSTHOOK: Lineage: npe_test PARTITION(ds=2012-12-11).key SIMPLE [(src)src.FieldS POSTHOOK: Lineage: npe_test PARTITION(ds=2012-12-11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: npe_test PARTITION(ds=2012-12-12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: npe_test PARTITION(ds=2012-12-12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME npe_test))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (OR (> (TOK_TABLE_OR_COL ds) (- (- 2012 11) 31)) (< (TOK_TABLE_OR_COL ds) (- (- 2012 12) 15)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -64,26 +61,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - npe_test + Map Operator Tree: TableScan alias: npe_test + Statistics: Num rows: 498 Data size: 5290 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((ds > ((2012 - 11) - 31)) or (ds < ((2012 - 12) - 15))) - type: boolean + predicate: ((ds > ((2012 - 11) - 31)) or (ds < ((2012 - 12) - 15))) (type: boolean) + Statistics: Num rows: 332 Data size: 3526 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string + expressions: key (type: string), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 332 Data size: 3526 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 332 Data size: 3526 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/semijoin.q.out ql/src/test/results/clientpositive/semijoin.q.out index e2486a0..9fa5bb9 100644 --- ql/src/test/results/clientpositive/semijoin.q.out +++ ql/src/test/results/clientpositive/semijoin.q.out @@ -106,9 +106,6 @@ PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key so PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_TABREF (TOK_TABNAME t2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -117,48 +114,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - expr: value - type: string - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -166,18 +148,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -185,27 +163,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -235,9 +205,6 @@ PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key so PREHOOK: type: QUERY POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t2) a) (TOK_TABREF (TOK_TABNAME t1) b) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL a) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -246,48 +213,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - expr: value - type: string - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -295,18 +247,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -314,27 +262,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -366,9 +306,6 @@ PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key so PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_TABREF (TOK_TABNAME t4) b) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL a) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -377,48 +314,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - expr: value - type: string - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -426,18 +348,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -445,27 +363,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -489,9 +399,6 @@ PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = PREHOOK: type: QUERY POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_TABREF (TOK_TABNAME t3) b) (and (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL a) key)) (< (. (TOK_TABLE_OR_COL b) key) '15')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -500,52 +407,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: value - type: string - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 15) - type: boolean + predicate: (key < 15) (type: boolean) + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int - expr: key - type: int + keys: key (type: int), key (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: int + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: - expr: _col1 - type: int - tag: 1 + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -553,16 +444,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col1 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -570,23 +459,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -621,9 +506,6 @@ PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_TABREF (TOK_TABNAME t2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (< (. (TOK_TABLE_OR_COL b) value) "val_10")))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -632,56 +514,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - expr: value - type: string - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (value < 'val_10') - type: boolean + predicate: (value < 'val_10') (type: boolean) + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int - expr: value - type: string + keys: key (type: int), value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -689,18 +551,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -708,27 +566,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -755,9 +605,6 @@ PREHOOK: query: explain select a.value from t1 a left semi join (select key from PREHOOK: type: QUERY POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (> (TOK_TABLE_OR_COL key) 5)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -766,50 +613,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: value - type: string - b:t3 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan alias: t3 + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 5) - type: boolean + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: int + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -817,16 +650,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col1 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -834,23 +665,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -877,9 +704,6 @@ PREHOOK: query: explain select a.value from t1 a left semi join (select key , va PREHOOK: type: QUERY POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (> (TOK_TABLE_OR_COL key) 5)))) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (<= (. (TOK_TABLE_OR_COL b) value) 'val_20')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -888,54 +712,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: value - type: string - b:t2 + Map Operator Tree: TableScan alias: t2 + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > 5) and (value <= 'val_20')) - type: boolean + predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: int - expr: _col1 - type: string + keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -943,16 +749,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col1 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -960,23 +764,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1000,9 +800,6 @@ PREHOOK: query: explain select * from t2 a left semi join (select key , value fr PREHOOK: type: QUERY POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t2) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (> (TOK_TABLE_OR_COL key) 2)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1011,52 +808,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - expr: value - type: string - b:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 2) - type: boolean + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: int + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1064,18 +845,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1083,27 +860,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1132,9 +901,6 @@ PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -1151,67 +917,58 @@ STAGE PLANS: b TableScan alias: b + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 key (type: int) + 1 _col0 (type: int) Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Semi Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[_col0]] + 0 key (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int + Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Local Work: Map Reduce Local Work Reduce Operator Tree: Extract + Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1254,9 +1011,6 @@ PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.ke PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_TABREF (TOK_TABNAME t2) b) (= (. (TOK_TABLE_OR_COL a) key) (* 2 (. (TOK_TABLE_OR_COL b) key))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1265,48 +1019,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - expr: value - type: string - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: (2 * _col0) - type: int + key expressions: (2 * _col0) (type: int) sort order: + - Map-reduce partition columns: - expr: (2 * _col0) - type: int - tag: 1 + Map-reduce partition columns: (2 * _col0) (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1314,18 +1053,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1333,27 +1068,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1381,9 +1108,6 @@ PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_TABREF (TOK_TABNAME t2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME t3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1392,65 +1116,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - expr: value - type: string - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - value expressions: - expr: key - type: int - expr: value - type: string - c + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) TableScan alias: c + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 2 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1460,22 +1161,14 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1483,31 +1176,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1547,9 +1228,6 @@ PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t1) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1558,60 +1236,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - expr: value - type: string - sort order: ++ - Map-reduce partition columns: - expr: key - type: int - expr: value - type: string - tag: 0 - value expressions: - expr: key - type: int - expr: value - type: string - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int - expr: value - type: string + keys: key (type: int), value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: int), value (type: string) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1619,18 +1270,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1638,27 +1285,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1696,9 +1335,6 @@ PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi jo PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME t2) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b c))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 @@ -1718,62 +1354,54 @@ STAGE PLANS: b TableScan alias: b + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 {key} 1 2 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[_col0]] - 2 [Column[_col0]] - Position of Big Table: 0 + 0 key (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) c TableScan alias: c + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 {key} 1 2 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[_col0]] - 2 [Column[_col0]] - Position of Big Table: 0 + 0 key (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Semi Join 0 to 1 @@ -1782,34 +1410,29 @@ STAGE PLANS: 0 {key} 1 2 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[_col0]] - 2 [Column[_col0]] + 0 key (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) outputColumnNames: _col0 - Position of Big Table: 0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Local Work: Map Reduce Local Work Reduce Operator Tree: Extract + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1849,9 +1472,6 @@ PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME t2) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1860,58 +1480,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - c + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE TableScan alias: c + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 2 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: @@ -1921,16 +1524,14 @@ STAGE PLANS: 0 {VALUE._col0} 1 2 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1938,23 +1539,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2006,9 +1603,6 @@ PREHOOK: query: explain select a.key from t1 a right outer join t3 b on a.key = PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t1 a right outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_TABREF (TOK_TABNAME t3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME t2) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2017,58 +1611,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - c + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE TableScan alias: c + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 2 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: @@ -2078,16 +1655,14 @@ STAGE PLANS: 0 {VALUE._col0} 1 2 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2095,23 +1670,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2166,9 +1737,6 @@ PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_TABREF (TOK_TABNAME t3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME t2) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2177,58 +1745,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - c + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE TableScan alias: c + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 2 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: @@ -2238,16 +1789,14 @@ STAGE PLANS: 0 {VALUE._col0} 1 2 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2255,23 +1804,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2326,9 +1871,6 @@ PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b. PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME t1) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2337,58 +1879,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - c + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE TableScan alias: c + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 2 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: @@ -2398,16 +1923,14 @@ STAGE PLANS: 0 {VALUE._col0} 1 2 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2415,23 +1938,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2486,9 +2005,6 @@ PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b. PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME t1) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2497,58 +2013,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - c + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE TableScan alias: c + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 2 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: @@ -2558,16 +2057,14 @@ STAGE PLANS: 0 {VALUE._col0} 1 2 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2575,23 +2072,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2648,9 +2141,6 @@ PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b. PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME t2) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2659,58 +2149,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - c + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE TableScan alias: c + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 2 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: @@ -2720,16 +2193,14 @@ STAGE PLANS: 0 {VALUE._col0} 1 2 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2737,23 +2208,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2821,9 +2288,6 @@ PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b. PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME t1) c) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL c) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -2833,48 +2297,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - expr: value - type: string - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: key + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: int + keys: key (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -2882,11 +2331,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2894,33 +2342,22 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col0 - type: int - c + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 24 Data size: 179 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) TableScan alias: c + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: value (type: string) sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 1 + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -2928,16 +2365,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 196 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2945,23 +2380,19 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int + Statistics: Num rows: 26 Data size: 196 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 26 Data size: 196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 26 Data size: 196 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3022,9 +2453,6 @@ PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t2) b) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL a) key) 100)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -3032,50 +2460,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Filter Operator - predicate: - expr: (key > 100) - type: boolean - Reduce Output Operator - key expressions: - expr: value - type: string - sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 0 - value expressions: - expr: key - type: int - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: value + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: value - type: string + keys: value (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 100) (type: boolean) + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: @@ -3083,16 +2497,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/serde_regex.q.out ql/src/test/results/clientpositive/serde_regex.q.out index 3893752..86e3cc6 100644 --- ql/src/test/results/clientpositive/serde_regex.q.out +++ ql/src/test/results/clientpositive/serde_regex.q.out @@ -32,9 +32,6 @@ WITH SERDEPROPERTIES ( ) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME serde_regex) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL host TOK_STRING) (TOK_TABCOL identity TOK_STRING) (TOK_TABCOL user TOK_STRING) (TOK_TABCOL time TOK_STRING) (TOK_TABCOL request TOK_STRING) (TOK_TABCOL status TOK_STRING) (TOK_TABCOL size TOK_INT) (TOK_TABCOL referer TOK_STRING) (TOK_TABCOL agent TOK_STRING)) (TOK_TABLESERIALIZER (TOK_SERDENAME 'org.apache.hadoop.hive.serde2.RegexSerDe' (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY "input.regex" "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))?"))))) TOK_TBLTEXTFILE) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -43,15 +40,12 @@ STAGE PLANS: Create Table Operator: Create Table columns: host string, identity string, user string, time string, request string, status string, size int, referer string, agent string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat serde name: org.apache.hadoop.hive.serde2.RegexSerDe serde properties: input.regex ([^ ]*) ([^ ]*) ([^ ]*) (-|\[[^\]]*\]) ([^ "]*|"[^"]*") (-|[0-9]*) (-|[0-9]*)(?: ([^ "]*|"[^"]*") ([^ "]*|"[^"]*"))? name: serde_regex - isExternal: false PREHOOK: query: CREATE TABLE serde_regex( host STRING, @@ -146,9 +140,6 @@ WITH SERDEPROPERTIES ( ) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME serde_regex1) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL key (TOK_DECIMAL 38 18)) (TOK_TABCOL value TOK_INT)) (TOK_TABLESERIALIZER (TOK_SERDENAME 'org.apache.hadoop.hive.serde2.RegexSerDe' (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY "input.regex" "([^ ]*) ([^ ]*)"))))) TOK_TBLTEXTFILE) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -157,15 +148,12 @@ STAGE PLANS: Create Table Operator: Create Table columns: key decimal(38,18), value int - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat serde name: org.apache.hadoop.hive.serde2.RegexSerDe serde properties: input.regex ([^ ]*) ([^ ]*) name: serde_regex1 - isExternal: false PREHOOK: query: CREATE TABLE serde_regex1( key decimal(38,18), diff --git ql/src/test/results/clientpositive/serde_user_properties.q.out ql/src/test/results/clientpositive/serde_user_properties.q.out index 1696a7d..e85cadb 100644 --- ql/src/test/results/clientpositive/serde_user_properties.q.out +++ ql/src/test/results/clientpositive/serde_user_properties.q.out @@ -7,7 +7,21 @@ POSTHOOK: query: -- HIVE-2906 Table properties in SQL explain extended select key from src POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -16,27 +30,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -109,7 +117,24 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select a.key from src a POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + a + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -118,27 +143,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -211,7 +230,27 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select a.key from src tablesample(1 percent) a POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_PERCENT 1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_TABLESPLITSAMPLE + TOK_PERCENT + 1 + a + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -220,27 +259,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -317,7 +350,26 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select key from src ('user.defined.key'='some.value') POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY 'user.defined.key' 'some.value'))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_TABLEPROPERTIES + TOK_TABLEPROPLIST + TOK_TABLEPROPERTY + 'user.defined.key' + 'some.value' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -326,27 +378,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -421,7 +467,29 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select key from src ('user.defined.key'='some.value') tablesample(1 percent) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY 'user.defined.key' 'some.value'))) (TOK_TABLESPLITSAMPLE TOK_PERCENT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_TABLEPROPERTIES + TOK_TABLEPROPLIST + TOK_TABLEPROPERTY + 'user.defined.key' + 'some.value' + TOK_TABLESPLITSAMPLE + TOK_PERCENT + 1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -430,27 +498,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -529,7 +591,29 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select a.key from src ('user.defined.key'='some.value') a POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY 'user.defined.key' 'some.value'))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_TABLEPROPERTIES + TOK_TABLEPROPLIST + TOK_TABLEPROPERTY + 'user.defined.key' + 'some.value' + a + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -538,27 +622,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -633,7 +711,32 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select a.key from src ('user.defined.key'='some.value') tablesample(1 percent) a POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY 'user.defined.key' 'some.value'))) (TOK_TABLESPLITSAMPLE TOK_PERCENT 1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_TABLEPROPERTIES + TOK_TABLEPROPLIST + TOK_TABLEPROPERTY + 'user.defined.key' + 'some.value' + TOK_TABLESPLITSAMPLE + TOK_PERCENT + 1 + a + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -642,27 +745,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/set_processor_namespaces.q.out ql/src/test/results/clientpositive/set_processor_namespaces.q.out index f0db61c..70a9066 100644 --- ql/src/test/results/clientpositive/set_processor_namespaces.q.out +++ ql/src/test/results/clientpositive/set_processor_namespaces.q.out @@ -7,9 +7,6 @@ PREHOOK: query: EXPLAIN SELECT * FROM src where key=5 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM src where key=5 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 5)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -17,24 +14,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 5) - type: boolean + predicate: (key = 5) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/set_variable_sub.q.out ql/src/test/results/clientpositive/set_variable_sub.q.out index b633fa8..f9237a5 100644 --- ql/src/test/results/clientpositive/set_variable_sub.q.out +++ ql/src/test/results/clientpositive/set_variable_sub.q.out @@ -2,9 +2,6 @@ PREHOOK: query: EXPLAIN SELECT * FROM src where key="value1" PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM src where key="value1" POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) "value1")))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -12,24 +9,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 'value1') - type: boolean + predicate: (key = 'value1') (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -43,9 +36,6 @@ PREHOOK: query: EXPLAIN SELECT * FROM src where key="value1" PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM src where key="value1" POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) "value1")))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -53,24 +43,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 'value1') - type: boolean + predicate: (key = 'value1') (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -84,9 +70,6 @@ PREHOOK: query: EXPLAIN SELECT * FROM src where key="1" PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM src where key="1" POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) "1")))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -94,24 +77,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = '1') - type: boolean + predicate: (key = '1') (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/show_columns.q.out ql/src/test/results/clientpositive/show_columns.q.out index 17f1ea4..5212cfc 100644 --- ql/src/test/results/clientpositive/show_columns.q.out +++ ql/src/test/results/clientpositive/show_columns.q.out @@ -9,9 +9,6 @@ PREHOOK: type: SHOWCOLUMNS POSTHOOK: query: EXPLAIN SHOW COLUMNS from shcol_test POSTHOOK: type: SHOWCOLUMNS -ABSTRACT SYNTAX TREE: - (TOK_SHOWCOLUMNS (TOK_TABNAME shcol_test)) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage diff --git ql/src/test/results/clientpositive/show_indexes_edge_cases.q.out ql/src/test/results/clientpositive/show_indexes_edge_cases.q.out index 9e6e4da..2f598b1 100644 --- ql/src/test/results/clientpositive/show_indexes_edge_cases.q.out +++ ql/src/test/results/clientpositive/show_indexes_edge_cases.q.out @@ -116,9 +116,6 @@ POSTHOOK: Lineage: default__show_idx_full_idx_compound__._bucketname SIMPLE [(sh POSTHOOK: Lineage: default__show_idx_full_idx_compound__._offsets EXPRESSION [(show_idx_full)show_idx_full.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__show_idx_full_idx_compound__.key SIMPLE [(show_idx_full)show_idx_full.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: default__show_idx_full_idx_compound__.value1 SIMPLE [(show_idx_full)show_idx_full.FieldSchema(name:value1, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_SHOWINDEXES show_idx_full) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage @@ -170,9 +167,6 @@ POSTHOOK: Lineage: default__show_idx_full_idx_compound__._bucketname SIMPLE [(sh POSTHOOK: Lineage: default__show_idx_full_idx_compound__._offsets EXPRESSION [(show_idx_full)show_idx_full.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__show_idx_full_idx_compound__.key SIMPLE [(show_idx_full)show_idx_full.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: default__show_idx_full_idx_compound__.value1 SIMPLE [(show_idx_full)show_idx_full.FieldSchema(name:value1, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_SHOWINDEXES show_idx_empty) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage diff --git ql/src/test/results/clientpositive/show_indexes_syntax.q.out ql/src/test/results/clientpositive/show_indexes_syntax.q.out index 98d757b..070ba57 100644 --- ql/src/test/results/clientpositive/show_indexes_syntax.q.out +++ ql/src/test/results/clientpositive/show_indexes_syntax.q.out @@ -32,9 +32,6 @@ POSTHOOK: type: SHOWINDEXES POSTHOOK: Lineage: default__show_idx_t1_idx_t1__._bucketname SIMPLE [(show_idx_t1)show_idx_t1.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__show_idx_t1_idx_t1__._offsets EXPRESSION [(show_idx_t1)show_idx_t1.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__show_idx_t1_idx_t1__.key SIMPLE [(show_idx_t1)show_idx_t1.FieldSchema(name:key, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_SHOWINDEXES show_idx_t1) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage @@ -65,9 +62,6 @@ POSTHOOK: type: SHOWINDEXES POSTHOOK: Lineage: default__show_idx_t1_idx_t1__._bucketname SIMPLE [(show_idx_t1)show_idx_t1.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__show_idx_t1_idx_t1__._offsets EXPRESSION [(show_idx_t1)show_idx_t1.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__show_idx_t1_idx_t1__.key SIMPLE [(show_idx_t1)show_idx_t1.FieldSchema(name:key, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_SHOWINDEXES show_idx_t1) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage @@ -98,9 +92,6 @@ POSTHOOK: type: SHOWINDEXES POSTHOOK: Lineage: default__show_idx_t1_idx_t1__._bucketname SIMPLE [(show_idx_t1)show_idx_t1.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__show_idx_t1_idx_t1__._offsets EXPRESSION [(show_idx_t1)show_idx_t1.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__show_idx_t1_idx_t1__.key SIMPLE [(show_idx_t1)show_idx_t1.FieldSchema(name:key, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_SHOWINDEXES show_idx_t1 FORMATTED) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage diff --git ql/src/test/results/clientpositive/show_tables.q.out ql/src/test/results/clientpositive/show_tables.q.out index e097882..fddb234 100644 --- ql/src/test/results/clientpositive/show_tables.q.out +++ ql/src/test/results/clientpositive/show_tables.q.out @@ -14,9 +14,6 @@ PREHOOK: type: SHOWTABLES POSTHOOK: query: EXPLAIN SHOW TABLES 'shtb_*' POSTHOOK: type: SHOWTABLES -ABSTRACT SYNTAX TREE: - (TOK_SHOWTABLES 'shtb_*') - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage @@ -44,9 +41,6 @@ PREHOOK: type: SHOWTABLES POSTHOOK: query: EXPLAIN SHOW TABLES LIKE 'shtb_test1|shtb_test2' POSTHOOK: type: SHOWTABLES -ABSTRACT SYNTAX TREE: - (TOK_SHOWTABLES 'shtb_test1|shtb_test2') - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage diff --git ql/src/test/results/clientpositive/show_tablestatus.q.out ql/src/test/results/clientpositive/show_tablestatus.q.out index 3d1ab2d..4d3b182 100644 --- ql/src/test/results/clientpositive/show_tablestatus.q.out +++ ql/src/test/results/clientpositive/show_tablestatus.q.out @@ -4,9 +4,6 @@ PREHOOK: type: SHOW_TABLESTATUS POSTHOOK: query: EXPLAIN SHOW TABLE EXTENDED IN default LIKE `src` POSTHOOK: type: SHOW_TABLESTATUS -ABSTRACT SYNTAX TREE: - (TOK_SHOW_TABLESTATUS `src` default) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage diff --git ql/src/test/results/clientpositive/showparts.q.out ql/src/test/results/clientpositive/showparts.q.out index 0d688aa..9f40fc0 100644 --- ql/src/test/results/clientpositive/showparts.q.out +++ ql/src/test/results/clientpositive/showparts.q.out @@ -4,9 +4,6 @@ PREHOOK: type: SHOWPARTITIONS POSTHOOK: query: EXPLAIN SHOW PARTITIONS srcpart POSTHOOK: type: SHOWPARTITIONS -ABSTRACT SYNTAX TREE: - (TOK_SHOWPARTITIONS (TOK_TABNAME srcpart)) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 is a root stage diff --git ql/src/test/results/clientpositive/skewjoin.q.out ql/src/test/results/clientpositive/skewjoin.q.out index b5b959f..214e7df 100644 --- ql/src/test/results/clientpositive/skewjoin.q.out +++ ql/src/test/results/clientpositive/skewjoin.q.out @@ -55,9 +55,6 @@ POSTHOOK: query: EXPLAIN FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) src1) (TOK_TABREF (TOK_TABNAME src) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-0 @@ -68,37 +65,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1 + Map Operator Tree: TableScan - alias: src1 + alias: src2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - src2 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: src2 + alias: src1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -108,16 +93,14 @@ STAGE PLANS: 1 {VALUE._col1} handleSkewJoin: true outputColumnNames: _col0, _col5 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col5 - type: string + expressions: UDFToInteger(_col0) (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -129,8 +112,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - 0 + Map Operator Tree: TableScan Map Join Operator condition map: @@ -138,22 +120,15 @@ STAGE PLANS: condition expressions: 0 {0_VALUE_0} 1 {1_VALUE_0} - handleSkewJoin: false keys: - 0 [Column[joinkey0]] - 1 [Column[joinkey0]] + 0 joinkey0 (type: string) + 1 joinkey0 (type: string) outputColumnNames: _col0, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col5 - type: string + expressions: UDFToInteger(_col0) (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -219,9 +194,6 @@ FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME T3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME T4) d) (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_STREAMTABLE (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -229,75 +201,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: d + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 3 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string - c + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: val - type: string - d + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: d + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -309,30 +249,15 @@ STAGE PLANS: 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} 3 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Statistics: Num rows: 0 Data size: 99 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col12 - type: string - expr: _col13 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string), _col9 (type: string), _col12 (type: string), _col13 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 99 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 99 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -379,9 +304,6 @@ FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME T3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME T4) d) (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_STREAMTABLE (TOK_HINTARGLIST a c))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -389,75 +311,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: d + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 3 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string - c + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: val - type: string - d + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: d + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -469,30 +359,15 @@ STAGE PLANS: 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} 3 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Statistics: Num rows: 0 Data size: 99 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col12 - type: string - expr: _col13 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string), _col9 (type: string), _col12 (type: string), _col13 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 99 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 99 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -531,9 +406,6 @@ POSTHOOK: query: EXPLAIN FROM T1 a JOIN src c ON c.key+1=a.key SELECT /*+ STREAM POSTHOOK: type: QUERY POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME src) c) (= (+ (. (TOK_TABLE_OR_COL c) key) 1) (. (TOK_TABLE_OR_COL a) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_STREAMTABLE (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL a) key)))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL a) val)))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL c) key))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -542,39 +414,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: c + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(key) - type: double + key expressions: (key + 1) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(key) - type: double - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string - c + Map-reduce partition columns: (key + 1) (type: double) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: c + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: (key + 1) - type: double + key expressions: UDFToDouble(key) (type: double) sort order: + - Map-reduce partition columns: - expr: (key + 1) - type: double - tag: 0 - value expressions: - expr: key - type: string + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -582,28 +440,19 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col1)) - expr: sum(hash(_col4)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col4)) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -611,40 +460,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - expr: sum(VALUE._col2) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -683,9 +517,6 @@ SELECT sum(hash(Y.key)), sum(hash(Y.value)) POSTHOOK: type: QUERY POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))))) x) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))))) Y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL Y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL Y) key)))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL Y) value))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-2 @@ -696,48 +527,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - y:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -747,23 +562,18 @@ STAGE PLANS: 1 {VALUE._col0} {VALUE._col1} handleSkewJoin: true outputColumnNames: _col2, _col3 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col2 (type: string), _col3 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -774,8 +584,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - 0 + Map Operator Tree: TableScan Map Join Operator condition map: @@ -783,29 +592,19 @@ STAGE PLANS: condition expressions: 0 1 {1_VALUE_0} {1_VALUE_1} - handleSkewJoin: false keys: - 0 [Column[joinkey0]] - 1 [Column[joinkey0]] + 0 joinkey0 (type: string) + 1 joinkey0 (type: string) outputColumnNames: _col2, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col2 (type: string), _col3 (type: string) outputColumnNames: _col2, _col3 Group By Operator - aggregations: - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -822,35 +621,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -897,9 +686,6 @@ SELECT sum(hash(Y.key)), sum(hash(Y.value)) POSTHOOK: type: QUERY POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))))) x) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))))) Y) (and (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL Y) key)) (= (TOK_FUNCTION substring (. (TOK_TABLE_OR_COL x) value) 5) (+ (TOK_FUNCTION substring (. (TOK_TABLE_OR_COL y) value) 5) 1))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL Y) key)))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL Y) value))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-2 @@ -910,58 +696,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: UDFToDouble(substring(_col1, 5)) - type: double + key expressions: _col0 (type: string), UDFToDouble(substring(_col1, 5)) (type: double) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: UDFToDouble(substring(_col1, 5)) - type: double - tag: 0 - y:src + Map-reduce partition columns: _col0 (type: string), UDFToDouble(substring(_col1, 5)) (type: double) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: (substring(_col1, 5) + 1) - type: double + key expressions: _col0 (type: string), (substring(_col1, 5) + 1) (type: double) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: (substring(_col1, 5) + 1) - type: double - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string), (substring(_col1, 5) + 1) (type: double) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -971,23 +731,18 @@ STAGE PLANS: 1 {VALUE._col0} {VALUE._col1} handleSkewJoin: true outputColumnNames: _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col2 (type: string), _col3 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -998,8 +753,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - 0 + Map Operator Tree: TableScan Map Join Operator condition map: @@ -1007,29 +761,19 @@ STAGE PLANS: condition expressions: 0 1 {1_VALUE_0} {1_VALUE_1} - handleSkewJoin: false keys: - 0 [Column[joinkey0], Column[joinkey1]] - 1 [Column[joinkey0], Column[joinkey1]] + 0 joinkey0 (type: string), joinkey1 (type: double) + 1 joinkey0 (type: string), joinkey1 (type: double) outputColumnNames: _col2, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col2 (type: string), _col3 (type: string) outputColumnNames: _col2, _col3 Group By Operator - aggregations: - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col2)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1046,35 +790,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1129,9 +863,6 @@ ON src1.c1 = src3.c5 AND src3.c5 < 80 POSTHOOK: type: QUERY POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c2)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c4)))) src2) (AND (= (. (TOK_TABLE_OR_COL src1) c1) (. (TOK_TABLE_OR_COL src2) c3)) (< (. (TOK_TABLE_OR_COL src1) c1) 100))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) c5) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) c6)))) src3) (AND (= (. (TOK_TABLE_OR_COL src1) c1) (. (TOK_TABLE_OR_COL src3) c5)) (< (. (TOK_TABLE_OR_COL src3) c5) 80)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL src1) c1)))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL src2) c4))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-5, Stage-6, Stage-2 @@ -1143,78 +874,54 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 80) and (key < 100)) - type: boolean + predicate: ((key < 100) and (key < 80)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - src2:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 100) and (key < 80)) - type: boolean + predicate: ((key < 80) and (key < 100)) (type: boolean) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col1 - type: string - src3:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 100) and (key < 80)) - type: boolean + predicate: ((key < 100) and (key < 80)) (type: boolean) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 2 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 601 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1226,23 +933,18 @@ STAGE PLANS: 2 handleSkewJoin: true outputColumnNames: _col0, _col3 + Statistics: Num rows: 13 Data size: 1322 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col3 + Statistics: Num rows: 13 Data size: 1322 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1253,8 +955,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - 0 + Map Operator Tree: TableScan Map Join Operator condition map: @@ -1264,30 +965,20 @@ STAGE PLANS: 0 {0_VALUE_0} 1 {1_VALUE_0} 2 - handleSkewJoin: false keys: - 0 [Column[joinkey0]] - 1 [Column[joinkey0]] - 2 [Column[joinkey0]] + 0 joinkey0 (type: string) + 1 joinkey0 (type: string) + 2 joinkey0 (type: string) outputColumnNames: _col0, _col3 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col3 Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1309,35 +1000,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1345,8 +1026,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - 1 + Map Operator Tree: TableScan Map Join Operator condition map: @@ -1356,30 +1036,20 @@ STAGE PLANS: 0 {0_VALUE_0} 1 {1_VALUE_0} 2 - handleSkewJoin: false keys: - 0 [Column[joinkey0]] - 1 [Column[joinkey0]] - 2 [Column[joinkey0]] + 0 joinkey0 (type: string) + 1 joinkey0 (type: string) + 2 joinkey0 (type: string) outputColumnNames: _col0, _col3 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col3 Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col3)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col3)) mode: hash outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1438,9 +1108,6 @@ SELECT /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) FROM T1 k LEFT OUTER POSTHOOK: type: QUERY POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME T1) k) (TOK_TABREF (TOK_TABNAME T1) v) (= (+ (. (TOK_TABLE_OR_COL k) key) 1) (. (TOK_TABLE_OR_COL v) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST v))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL k) key)))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL v) val))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1448,44 +1115,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - k + Map Operator Tree: TableScan alias: k + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 {key} 1 {val} - handleSkewJoin: false keys: - 0 [GenericUDFOPPlus(Column[key], Const int 1)] - 1 [GenericUDFBridge(Column[key])] + 0 (key + 1) (type: double) + 1 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col5 - Position of Big Table: 0 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col5 (type: string) outputColumnNames: _col0, _col5 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col5)) - bucketGroup: false + aggregations: sum(hash(_col0)), sum(hash(_col5)) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -1496,24 +1153,20 @@ STAGE PLANS: v TableScan alias: v + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: sum(VALUE._col1) - bucketGroup: false + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/skewjoin_noskew.q.out ql/src/test/results/clientpositive/skewjoin_noskew.q.out index 608fac5..e8134ad 100644 --- ql/src/test/results/clientpositive/skewjoin_noskew.q.out +++ ql/src/test/results/clientpositive/skewjoin_noskew.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: query: explain create table noskew as select a.* from src a join src b on a.key=b.key order by a.key limit 30 POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME noskew) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 30)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-6 depends on stages: Stage-1 , consists of Stage-5, Stage-2 @@ -19,36 +16,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -58,16 +43,13 @@ STAGE PLANS: 1 handleSkewJoin: true outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -78,8 +60,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - 0 + Map Operator Tree: TableScan Map Join Operator condition map: @@ -87,22 +68,15 @@ STAGE PLANS: condition expressions: 0 {0_VALUE_0} {0_VALUE_1} 1 - handleSkewJoin: false keys: - 0 [Column[joinkey0]] - 1 [Column[joinkey0]] + 0 joinkey0 (type: string) + 1 joinkey0 (type: string) outputColumnNames: _col0, _col1 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -119,26 +93,22 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 30 + Statistics: Num rows: 30 Data size: 3030 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 30 Data size: 3030 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -155,12 +125,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: key string, value string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: noskew - isExternal: false Stage: Stage-3 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/skewjoinopt1.q.out ql/src/test/results/clientpositive/skewjoinopt1.q.out index c98a44b..2acf4ff 100644 --- ql/src/test/results/clientpositive/skewjoinopt1.q.out +++ ql/src/test/results/clientpositive/skewjoinopt1.q.out @@ -36,9 +36,6 @@ POSTHOOK: query: -- a simple join query with skew on both the tables on the join EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -48,49 +45,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((key = '2') or (key = '3'))) - type: boolean + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((key = '2') or (key = '3'))) - type: boolean + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -98,22 +77,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -121,27 +92,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -149,49 +122,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((key = '2') or (key = '3')) - type: boolean + predicate: ((key = '2') or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((key = '2') or (key = '3')) - type: boolean + predicate: ((key = '2') or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -199,22 +154,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -252,9 +199,6 @@ POSTHOOK: query: -- test outer joins also EXPLAIN SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -264,49 +208,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((key = '2') or (key = '3'))) - type: boolean + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((key = '2') or (key = '3'))) - type: boolean + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -314,22 +240,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -337,27 +255,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -365,49 +285,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((key = '2') or (key = '3')) - type: boolean + predicate: ((key = '2') or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((key = '2') or (key = '3')) - type: boolean + predicate: ((key = '2') or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -415,22 +317,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -470,9 +364,6 @@ POSTHOOK: query: -- an aggregation at the end should not change anything EXPLAIN SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -482,39 +373,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((key = '2') or (key = '3'))) - type: boolean + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((key = '2') or (key = '3'))) - type: boolean + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -522,11 +403,11 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -534,56 +415,50 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint -#### A masked pattern was here #### + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -591,39 +466,29 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((key = '2') or (key = '3')) - type: boolean + predicate: ((key = '2') or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((key = '2') or (key = '3')) - type: boolean + predicate: ((key = '2') or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -631,11 +496,11 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -662,9 +527,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -674,39 +536,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((key = '2') or (key = '3'))) - type: boolean + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((key = '2') or (key = '3'))) - type: boolean + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -714,11 +566,11 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -726,56 +578,50 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint -#### A masked pattern was here #### + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -783,39 +629,29 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((key = '2') or (key = '3')) - type: boolean + predicate: ((key = '2') or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((key = '2') or (key = '3')) - type: boolean + predicate: ((key = '2') or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -823,11 +659,11 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt10.q.out ql/src/test/results/clientpositive/skewjoinopt10.q.out index 6ab0976..95d4b32 100644 --- ql/src/test/results/clientpositive/skewjoinopt10.q.out +++ ql/src/test/results/clientpositive/skewjoinopt10.q.out @@ -42,9 +42,6 @@ select * from (select a.key as key, b.value as array_val from T1 a join array_va POSTHOOK: type: QUERY POSTHOOK: Lineage: array_valued_t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: array_valued_t1.value EXPRESSION [(t1)t1.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_TABLE_OR_COL array_val)) val (TOK_TABALIAS c))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME array_valued_T1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) array_val)))) i))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -54,45 +51,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '8')) - type: boolean + predicate: (not (key = '8')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string) TableScan alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (not (key = '8')) - type: boolean + predicate: (not (key = '8')) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: value - type: array + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: array) Reduce Operator Tree: Join Operator condition map: @@ -100,18 +83,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col5 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: array + expressions: _col0 (type: string), _col5 (type: array) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -119,105 +98,95 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: array - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: _col1 - type: array + expressions: _col1 (type: array) outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: array - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: array - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: _col1 - type: array + expressions: _col1 (type: array) outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: array - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -225,45 +194,31 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - i:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '8') - type: boolean + predicate: (key = '8') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - i:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string) TableScan alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = '8') - type: boolean + predicate: (key = '8') (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: value - type: array + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: array) Reduce Operator Tree: Join Operator condition map: @@ -271,18 +226,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col5 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: array + expressions: _col0 (type: string), _col5 (type: array) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt11.q.out ql/src/test/results/clientpositive/skewjoinopt11.q.out index 1b5f013..0495d1e 100644 --- ql/src/test/results/clientpositive/skewjoinopt11.q.out +++ ql/src/test/results/clientpositive/skewjoinopt11.q.out @@ -48,9 +48,6 @@ select * from select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key ) subq1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) val) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) val) val2)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) val) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) val) val2))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-5 @@ -64,47 +61,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '2')) - type: boolean + predicate: (not (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '2')) - type: boolean + predicate: (not (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -112,20 +93,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -133,27 +108,27 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -161,41 +136,31 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -203,47 +168,31 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:subq1-subquery2:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '2') - type: boolean + predicate: (key = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - null-subquery2:subq1-subquery2:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '2') - type: boolean + predicate: (key = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -251,20 +200,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -272,47 +215,31 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:subq1-subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '2') - type: boolean + predicate: (key = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - null-subquery1:subq1-subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '2') - type: boolean + predicate: (key = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -320,20 +247,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -341,27 +262,27 @@ STAGE PLANS: Stage: Stage-8 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -369,47 +290,31 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - subquery2:a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '2')) - type: boolean + predicate: (not (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery2:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '2')) - type: boolean + predicate: (not (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -417,20 +322,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt12.q.out ql/src/test/results/clientpositive/skewjoinopt12.q.out index 28c4c10..a68543c 100644 --- ql/src/test/results/clientpositive/skewjoinopt12.q.out +++ ql/src/test/results/clientpositive/skewjoinopt12.q.out @@ -38,9 +38,6 @@ POSTHOOK: query: -- Both the join tables are skewed by 2 keys, and one of the sk EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) val) (. (TOK_TABLE_OR_COL b) val))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -50,57 +47,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13')))) - type: boolean + predicate: (not ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13')))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13')))) - type: boolean + predicate: (not ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13')))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -108,22 +79,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -131,27 +94,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -159,57 +124,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13'))) - type: boolean + predicate: ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13'))) - type: boolean + predicate: ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -217,22 +156,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt13.q.out ql/src/test/results/clientpositive/skewjoinopt13.q.out index bcacda0..c74edf6 100644 --- ql/src/test/results/clientpositive/skewjoinopt13.q.out +++ ql/src/test/results/clientpositive/skewjoinopt13.q.out @@ -59,9 +59,6 @@ from T1 a join T2 b on a.key = b.key join T3 c on a.val = c.val POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME T3) c) (= (. (TOK_TABLE_OR_COL a) val) (. (TOK_TABLE_OR_COL c) val)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -70,41 +67,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -112,11 +93,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -124,44 +104,23 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - c + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + value expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string) TableScan alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: val - type: string + key expressions: val (type: string) sort order: + - Map-reduce partition columns: - expr: val - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -169,26 +128,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt14.q.out ql/src/test/results/clientpositive/skewjoinopt14.q.out index 5193134..f2791ca 100644 --- ql/src/test/results/clientpositive/skewjoinopt14.q.out +++ ql/src/test/results/clientpositive/skewjoinopt14.q.out @@ -63,9 +63,6 @@ from T1 a join T2 b on a.key = b.key join T3 c on a.val = c.val POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME T3) c) (= (. (TOK_TABLE_OR_COL a) val) (. (TOK_TABLE_OR_COL c) val)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -75,49 +72,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '2')) - type: boolean + predicate: (not (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '2')) - type: boolean + predicate: (not (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -125,11 +104,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -137,70 +115,40 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - c - TableScan - alias: c - Reduce Output Operator - key expressions: - expr: val - type: string - sort order: + - Map-reduce partition columns: - expr: val - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col0 - type: string - expr: _col1 - type: string -#### A masked pattern was here #### + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string) TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string) + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -208,26 +156,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -235,49 +172,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '2') - type: boolean + predicate: (key = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '2') - type: boolean + predicate: (key = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -285,11 +204,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt15.q.out ql/src/test/results/clientpositive/skewjoinopt15.q.out index 79e8dde..58ba052 100644 --- ql/src/test/results/clientpositive/skewjoinopt15.q.out +++ ql/src/test/results/clientpositive/skewjoinopt15.q.out @@ -82,9 +82,6 @@ POSTHOOK: Lineage: t1.key EXPRESSION [(tmpt1)tmpt1.FieldSchema(name:key, type:st POSTHOOK: Lineage: t1.val SIMPLE [(tmpt1)tmpt1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key EXPRESSION [(tmpt2)tmpt2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(tmpt2)tmpt2.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -94,49 +91,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (not ((key = 2) or (key = 3))) - type: boolean + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: int), val (type: string) TableScan alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (not ((key = 2) or (key = 3))) - type: boolean + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - value expressions: - expr: key - type: int - expr: val - type: string + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: int), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -144,22 +123,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -167,27 +138,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -195,49 +168,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key = 2) or (key = 3)) - type: boolean + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - expr: val - type: string - b + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key = 2) or (key = 3)) - type: boolean + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - value expressions: - expr: key - type: int - expr: val - type: string + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -245,22 +200,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -306,9 +253,6 @@ POSTHOOK: Lineage: t1.key EXPRESSION [(tmpt1)tmpt1.FieldSchema(name:key, type:st POSTHOOK: Lineage: t1.val SIMPLE [(tmpt1)tmpt1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key EXPRESSION [(tmpt2)tmpt2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(tmpt2)tmpt2.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -318,49 +262,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (not ((key = 2) or (key = 3))) - type: boolean + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: int), val (type: string) TableScan alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (not ((key = 2) or (key = 3))) - type: boolean + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - value expressions: - expr: key - type: int - expr: val - type: string + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: int), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -368,22 +294,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -391,27 +309,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -419,49 +339,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key = 2) or (key = 3)) - type: boolean + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - expr: val - type: string - b + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key = 2) or (key = 3)) - type: boolean + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - value expressions: - expr: key - type: int - expr: val - type: string + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -469,22 +371,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -532,9 +426,6 @@ POSTHOOK: Lineage: t1.key EXPRESSION [(tmpt1)tmpt1.FieldSchema(name:key, type:st POSTHOOK: Lineage: t1.val SIMPLE [(tmpt1)tmpt1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key EXPRESSION [(tmpt2)tmpt2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(tmpt2)tmpt2.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -544,39 +435,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (not ((key = 2) or (key = 3))) - type: boolean + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - subquery1:b + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (not ((key = 2) or (key = 3))) - type: boolean + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -584,11 +465,11 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -596,56 +477,50 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint -#### A masked pattern was here #### + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan Union + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -653,39 +528,29 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key = 2) or (key = 3)) - type: boolean + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - b + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: a + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key = 2) or (key = 3)) - type: boolean + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -693,11 +558,11 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -732,9 +597,6 @@ POSTHOOK: Lineage: t1.key EXPRESSION [(tmpt1)tmpt1.FieldSchema(name:key, type:st POSTHOOK: Lineage: t1.val SIMPLE [(tmpt1)tmpt1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key EXPRESSION [(tmpt2)tmpt2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(tmpt2)tmpt2.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -744,39 +606,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (not ((key = 2) or (key = 3))) - type: boolean + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - subquery1:b + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (not ((key = 2) or (key = 3))) - type: boolean + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -784,11 +636,11 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -796,56 +648,50 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint -#### A masked pattern was here #### + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan Union + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -853,39 +699,29 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key = 2) or (key = 3)) - type: boolean + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - b + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: a + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key = 2) or (key = 3)) - type: boolean + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -893,11 +729,11 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt16.q.out ql/src/test/results/clientpositive/skewjoinopt16.q.out index 29aadbc..d5461d5 100644 --- ql/src/test/results/clientpositive/skewjoinopt16.q.out +++ ql/src/test/results/clientpositive/skewjoinopt16.q.out @@ -38,9 +38,6 @@ POSTHOOK: query: -- One of the tables is skewed by 2 columns, and the other tabl EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) val) (. (TOK_TABLE_OR_COL b) val))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -50,57 +47,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (((key = '2') and (val = '12')) or (key = '3'))) - type: boolean + predicate: (not (((key = '2') and (val = '12')) or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (((key = '2') and (val = '12')) or (key = '3'))) - type: boolean + predicate: (not (((key = '2') and (val = '12')) or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -108,22 +79,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -131,27 +94,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -159,57 +124,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (((key = '2') and (val = '12')) or (key = '3')) - type: boolean + predicate: (((key = '2') and (val = '12')) or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (((key = '2') and (val = '12')) or (key = '3')) - type: boolean + predicate: (((key = '2') and (val = '12')) or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -217,22 +156,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt17.q.out ql/src/test/results/clientpositive/skewjoinopt17.q.out index 49c5a4d..d293c12 100644 --- ql/src/test/results/clientpositive/skewjoinopt17.q.out +++ ql/src/test/results/clientpositive/skewjoinopt17.q.out @@ -42,9 +42,6 @@ POSTHOOK: query: -- One of the tables is skewed by 2 columns, and the other tabl EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -54,49 +51,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '2')) - type: boolean + predicate: (not (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '2')) - type: boolean + predicate: (not (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -104,22 +83,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -127,27 +98,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -155,49 +128,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '2') - type: boolean + predicate: (key = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '2') - type: boolean + predicate: (key = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -205,22 +160,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -304,9 +251,6 @@ POSTHOOK: query: -- One of the tables is skewed by 2 columns, and the other tabl EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) val) (. (TOK_TABLE_OR_COL b) val))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -316,57 +260,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (((key = '2') and (val = '12')) or (key = '2'))) - type: boolean + predicate: (not (((key = '2') and (val = '12')) or (key = '2'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (((key = '2') and (val = '12')) or (key = '2'))) - type: boolean + predicate: (not (((key = '2') and (val = '12')) or (key = '2'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -374,22 +292,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -397,27 +307,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -425,57 +337,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (((key = '2') and (val = '12')) or (key = '2')) - type: boolean + predicate: (((key = '2') and (val = '12')) or (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (((key = '2') and (val = '12')) or (key = '2')) - type: boolean + predicate: (((key = '2') and (val = '12')) or (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -483,22 +369,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt18.q.out ql/src/test/results/clientpositive/skewjoinopt18.q.out index d2da6a4..d8dadc8 100644 --- ql/src/test/results/clientpositive/skewjoinopt18.q.out +++ ql/src/test/results/clientpositive/skewjoinopt18.q.out @@ -67,9 +67,6 @@ SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Lineage: t1.key EXPRESSION [(tmpt1)tmpt1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(tmpt1)tmpt1.FieldSchema(name:val, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -77,41 +74,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(key) - type: double + key expressions: UDFToDouble(key) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(key) - type: double - tag: 0 - value expressions: - expr: key - type: int - expr: val - type: string - b + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(key) - type: double + key expressions: UDFToDouble(key) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(key) - type: double - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -119,22 +100,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt19.q.out ql/src/test/results/clientpositive/skewjoinopt19.q.out index 12bc36c..2790a0a 100644 --- ql/src/test/results/clientpositive/skewjoinopt19.q.out +++ ql/src/test/results/clientpositive/skewjoinopt19.q.out @@ -40,9 +40,6 @@ POSTHOOK: query: -- add a test where the skewed key is also the bucketized key EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -52,49 +49,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '2')) - type: boolean + predicate: (not (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '2')) - type: boolean + predicate: (not (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -102,22 +81,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -125,27 +96,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -153,49 +126,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '2') - type: boolean + predicate: (key = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '2') - type: boolean + predicate: (key = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -203,22 +158,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt2.q.out ql/src/test/results/clientpositive/skewjoinopt2.q.out index 8c52c68..5a0ff80 100644 --- ql/src/test/results/clientpositive/skewjoinopt2.q.out +++ ql/src/test/results/clientpositive/skewjoinopt2.q.out @@ -42,9 +42,6 @@ POSTHOOK: query: -- a simple query with skew on both the tables on the join key EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) val) (. (TOK_TABLE_OR_COL b) val))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -54,57 +51,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) - type: boolean + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) - type: boolean + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -112,22 +83,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -135,27 +98,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -163,57 +128,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) - type: boolean + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) - type: boolean + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -221,22 +160,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -271,9 +202,6 @@ POSTHOOK: query: -- test outer joins also EXPLAIN SELECT a.*, b.* FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) val) (. (TOK_TABLE_OR_COL b) val))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -283,57 +211,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) - type: boolean + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) - type: boolean + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -341,22 +243,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -364,27 +258,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -392,57 +288,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) - type: boolean + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) - type: boolean + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -450,22 +320,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -504,9 +366,6 @@ POSTHOOK: query: -- a group by at the end should not change anything EXPLAIN SELECT a.key, count(1) FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val group by a.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) val) (. (TOK_TABLE_OR_COL b) val))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -516,50 +375,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) - type: boolean + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 0 - value expressions: - expr: key - type: string - subquery1:b + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) - type: boolean + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 1 + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -567,16 +406,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -584,79 +421,57 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint -#### A masked pattern was here #### + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -664,50 +479,30 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) - type: boolean + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 0 - value expressions: - expr: key - type: string - b + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) - type: boolean + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 1 + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -715,16 +510,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -752,9 +545,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT a.key, count(1) FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val group by a.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) val) (. (TOK_TABLE_OR_COL b) val))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -764,50 +554,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) - type: boolean + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 0 - value expressions: - expr: key - type: string - subquery1:b + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) - type: boolean + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 1 + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -815,16 +585,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -832,79 +600,57 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint -#### A masked pattern was here #### + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -912,50 +658,30 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) - type: boolean + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 0 - value expressions: - expr: key - type: string - b + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) - type: boolean + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: val - type: string + key expressions: key (type: string), val (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: val - type: string - tag: 1 + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -963,16 +689,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt20.q.out ql/src/test/results/clientpositive/skewjoinopt20.q.out index 4911bf5..a616197 100644 --- ql/src/test/results/clientpositive/skewjoinopt20.q.out +++ ql/src/test/results/clientpositive/skewjoinopt20.q.out @@ -40,9 +40,6 @@ POSTHOOK: query: -- add a test where the skewed key is also the bucketized/sorte EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -52,49 +49,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '2')) - type: boolean + predicate: (not (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '2')) - type: boolean + predicate: (not (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -102,22 +81,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -125,27 +96,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -153,49 +126,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '2') - type: boolean + predicate: (key = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '2') - type: boolean + predicate: (key = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -203,22 +158,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt3.q.out ql/src/test/results/clientpositive/skewjoinopt3.q.out index eeeb8b8..0c78342 100644 --- ql/src/test/results/clientpositive/skewjoinopt3.q.out +++ ql/src/test/results/clientpositive/skewjoinopt3.q.out @@ -40,9 +40,6 @@ POSTHOOK: query: -- a simple query with skew on both the tables. One of the skew EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -52,49 +49,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (((key = '2') or (key = '8')) or (key = '3'))) - type: boolean + predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (((key = '2') or (key = '8')) or (key = '3'))) - type: boolean + predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -102,22 +81,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -125,27 +96,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -153,49 +126,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (((key = '2') or (key = '8')) or (key = '3')) - type: boolean + predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (((key = '2') or (key = '8')) or (key = '3')) - type: boolean + predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -203,22 +158,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -256,9 +203,6 @@ POSTHOOK: query: -- test outer joins also EXPLAIN SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -268,49 +212,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (((key = '2') or (key = '8')) or (key = '3'))) - type: boolean + predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (((key = '2') or (key = '8')) or (key = '3'))) - type: boolean + predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -318,22 +244,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -341,27 +259,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -369,49 +289,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (((key = '2') or (key = '8')) or (key = '3')) - type: boolean + predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (((key = '2') or (key = '8')) or (key = '3')) - type: boolean + predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -419,22 +321,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt4.q.out ql/src/test/results/clientpositive/skewjoinopt4.q.out index 438ae6e..ae8d5ce 100644 --- ql/src/test/results/clientpositive/skewjoinopt4.q.out +++ ql/src/test/results/clientpositive/skewjoinopt4.q.out @@ -36,9 +36,6 @@ POSTHOOK: query: -- only of the tables of the join (the left table of the join) EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -48,49 +45,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '2')) - type: boolean + predicate: (not (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '2')) - type: boolean + predicate: (not (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -98,22 +77,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -121,27 +92,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -149,49 +122,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '2') - type: boolean + predicate: (key = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '2') - type: boolean + predicate: (key = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -199,22 +154,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -250,9 +197,6 @@ POSTHOOK: query: -- the order of the join should not matter, just confirming EXPLAIN SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T2) a) (TOK_TABREF (TOK_TABNAME T1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -262,49 +206,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '2')) - type: boolean + predicate: (not (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (key = '2')) - type: boolean + predicate: (not (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -312,22 +238,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -335,27 +253,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -363,49 +283,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '2') - type: boolean + predicate: (key = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (key = '2') - type: boolean + predicate: (key = '2') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -413,22 +315,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt5.q.out ql/src/test/results/clientpositive/skewjoinopt5.q.out index af67597..ab8d169 100644 --- ql/src/test/results/clientpositive/skewjoinopt5.q.out +++ ql/src/test/results/clientpositive/skewjoinopt5.q.out @@ -38,9 +38,6 @@ POSTHOOK: query: -- One of the tables is skewed by 2 columns, and the other tabl EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -50,49 +47,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((key = '2') or (key = '3'))) - type: boolean + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((key = '2') or (key = '3'))) - type: boolean + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -100,22 +79,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -123,27 +94,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -151,49 +124,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((key = '2') or (key = '3')) - type: boolean + predicate: ((key = '2') or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((key = '2') or (key = '3')) - type: boolean + predicate: ((key = '2') or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -201,22 +156,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt6.q.out ql/src/test/results/clientpositive/skewjoinopt6.q.out index 864564f..4da0473 100644 --- ql/src/test/results/clientpositive/skewjoinopt6.q.out +++ ql/src/test/results/clientpositive/skewjoinopt6.q.out @@ -40,9 +40,6 @@ POSTHOOK: query: -- Both the join tables are skewed by 2 keys, and one of the sk EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -52,49 +49,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (((key = '2') or (key = '8')) or (key = '3'))) - type: boolean + predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (((key = '2') or (key = '8')) or (key = '3'))) - type: boolean + predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -102,22 +81,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -125,27 +96,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -153,49 +126,31 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (((key = '2') or (key = '8')) or (key = '3')) - type: boolean + predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (((key = '2') or (key = '8')) or (key = '3')) - type: boolean + predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -203,22 +158,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt7.q.out ql/src/test/results/clientpositive/skewjoinopt7.q.out index aaaf40c..9262dba 100644 --- ql/src/test/results/clientpositive/skewjoinopt7.q.out +++ ql/src/test/results/clientpositive/skewjoinopt7.q.out @@ -51,9 +51,6 @@ POSTHOOK: query: -- This test is for validating skewed join compile time optimiz EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME T3) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME c)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-5 @@ -63,70 +60,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan - alias: a + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (((key = '2') or (key = '8')) or (key = '3'))) - type: boolean + predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (((key = '2') or (key = '8')) or (key = '3'))) - type: boolean + predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:c + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: c + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not (((key = '2') or (key = '8')) or (key = '3'))) - type: boolean + predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -136,26 +106,14 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -163,27 +121,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -191,70 +151,43 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (((key = '2') or (key = '8')) or (key = '3')) - type: boolean + predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (((key = '2') or (key = '8')) or (key = '3')) - type: boolean + predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string - c + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: c + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (((key = '2') or (key = '8')) or (key = '3')) - type: boolean + predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -264,26 +197,14 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt8.q.out ql/src/test/results/clientpositive/skewjoinopt8.q.out index ca5a61d..d051e27 100644 --- ql/src/test/results/clientpositive/skewjoinopt8.q.out +++ ql/src/test/results/clientpositive/skewjoinopt8.q.out @@ -49,9 +49,6 @@ POSTHOOK: query: -- This test is for validating skewed join compile time optimiz EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME T3) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME c)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-5 @@ -61,70 +58,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subquery1:a + Map Operator Tree: TableScan - alias: a + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((key = '3') or (key = '8'))) - type: boolean + predicate: (not ((key = '3') or (key = '8'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((key = '3') or (key = '8'))) - type: boolean + predicate: (not ((key = '3') or (key = '8'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string - subquery1:c + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: c + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (not ((key = '3') or (key = '8'))) - type: boolean + predicate: (not ((key = '3') or (key = '8'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -134,26 +104,14 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -161,27 +119,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -189,70 +149,43 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((key = '3') or (key = '8')) - type: boolean + predicate: ((key = '3') or (key = '8')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: val - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: b + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((key = '3') or (key = '8')) - type: boolean + predicate: ((key = '3') or (key = '8')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string - c + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan - alias: c + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: ((key = '3') or (key = '8')) - type: boolean + predicate: ((key = '3') or (key = '8')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -262,26 +195,14 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col8 (type: string), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/skewjoinopt9.q.out ql/src/test/results/clientpositive/skewjoinopt9.q.out index c1a82f1..3e3e2cb 100644 --- ql/src/test/results/clientpositive/skewjoinopt9.q.out +++ ql/src/test/results/clientpositive/skewjoinopt9.q.out @@ -46,9 +46,6 @@ select key, val from T1 ) subq1 join T2 b on subq1.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)))))) subq1) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -56,74 +53,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string - null-subquery1:subq1-subquery1:t1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string), val (type: string) TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Union + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - null-subquery2:subq1-subquery2:t1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: val - type: string + expressions: key (type: string), val (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Union + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -131,22 +100,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 66 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 66 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 66 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -210,9 +172,6 @@ select key, count(1) as cnt from T1 group by key ) subq1 join T2 b on subq1.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq1) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -221,56 +180,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -278,40 +220,23 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - b + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: val - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -319,22 +244,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin9.q.out ql/src/test/results/clientpositive/smb_mapjoin9.q.out index c3302a0..1b6bec3 100644 --- ql/src/test/results/clientpositive/smb_mapjoin9.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin9.q.out @@ -23,7 +23,81 @@ hive_test_smb_bucket2 b ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME hive_test_smb_bucket1) a) (TOK_TABREF (TOK_TABNAME hive_test_smb_bucket2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) ds)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) k2)) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL a) ds) '2010-10-15') (= (. (TOK_TABLE_OR_COL b) ds) '2010-10-15')) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) key)))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + hive_test_smb_bucket1 + a + TOK_TABREF + TOK_TABNAME + hive_test_smb_bucket2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + k1 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + ds + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + k2 + TOK_WHERE + and + and + = + . + TOK_TABLE_OR_COL + a + ds + '2010-10-15' + = + . + TOK_TABLE_OR_COL + b + ds + '2010-10-15' + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + b + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -32,63 +106,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a - TableScan - alias: a - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: ((ds = '2010-10-15') and key is not null) - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {key} {value} {ds} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col5, _col6, _col7 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col5 - type: int - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col0 - type: int - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types int:string:string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -123,7 +140,81 @@ hive_test_smb_bucket2 b ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME hive_test_smb_bucket1) a) (TOK_TABREF (TOK_TABNAME hive_test_smb_bucket2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) ds)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) k2)) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL a) ds) '2010-10-15') (= (. (TOK_TABLE_OR_COL b) ds) '2010-10-15')) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) key)))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + hive_test_smb_bucket1 + a + TOK_TABREF + TOK_TABNAME + hive_test_smb_bucket2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + k1 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + ds + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + k2 + TOK_WHERE + and + and + = + . + TOK_TABLE_OR_COL + a + ds + '2010-10-15' + = + . + TOK_TABLE_OR_COL + b + ds + '2010-10-15' + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + b + key + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -132,63 +223,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b - TableScan - alias: b - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: ((ds = '2010-10-15') and key is not null) - type: boolean - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {key} {value} {ds} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col5, _col6, _col7 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col5 - type: int - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col0 - type: int - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types int:string:string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -250,9 +284,6 @@ POSTHOOK: Lineage: hive_test_smb_bucket1 PARTITION(ds=2010-10-15).key EXPRESSION POSTHOOK: Lineage: hive_test_smb_bucket1 PARTITION(ds=2010-10-15).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: hive_test_smb_bucket2 PARTITION(ds=2010-10-15).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: hive_test_smb_bucket2 PARTITION(ds=2010-10-15).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME smb_mapjoin9_results) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME hive_test_smb_bucket1) a) (TOK_TABREF (TOK_TABNAME hive_test_smb_bucket2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) ds)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) k2)) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL a) ds) '2010-10-15') (= (. (TOK_TABLE_OR_COL b) ds) '2010-10-15')) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) key))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 @@ -267,40 +298,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: key is not null - type: boolean + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 {key} {value} {ds} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col5, _col6, _col7 - Position of Big Table: 0 Select Operator - expressions: - expr: _col5 - type: int - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col0 - type: int + expressions: _col5 (type: int), _col6 (type: string), _col7 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -326,24 +345,19 @@ STAGE PLANS: Create Table Operator: Create Table columns: k1 int, value string, ds string, k2 int - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: smb_mapjoin9_results - isExternal: false Stage: Stage-3 Stats-Aggr Operator Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -352,12 +366,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_1.q.out ql/src/test/results/clientpositive/smb_mapjoin_1.q.out index a0bcb53..cff3b39 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_1.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_1.q.out @@ -37,9 +37,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -47,36 +44,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -102,9 +88,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -112,36 +95,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -172,9 +144,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -182,36 +151,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Right Outer Join0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -241,9 +199,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -251,36 +206,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Outer Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -315,9 +259,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -325,36 +266,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -380,9 +310,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -390,36 +317,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -450,9 +366,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -460,36 +373,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Right Outer Join0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -519,9 +421,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -529,36 +428,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Outer Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_10.q.out ql/src/test/results/clientpositive/smb_mapjoin_10.q.out index 523ee7c..ea29bac 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_10.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_10.q.out @@ -61,9 +61,6 @@ on (a.ds = '1' and b.ds = '2' and a.postid = b.postid and a.type = b.type) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tmp_smb_bucket_10) a) (TOK_TABREF (TOK_TABNAME tmp_smb_bucket_10) b) (and (and (and (and (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '2')) (= (. (TOK_TABLE_OR_COL a) userid) (. (TOK_TABLE_OR_COL b) userid))) (= (. (TOK_TABLE_OR_COL a) pageid) (. (TOK_TABLE_OR_COL b) pageid))) (= (. (TOK_TABLE_OR_COL a) postid) (. (TOK_TABLE_OR_COL b) postid))) (= (. (TOK_TABLE_OR_COL a) type) (. (TOK_TABLE_OR_COL b) type))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -71,48 +68,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 3 Data size: 414 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {userid} {pageid} {postid} {type} {ds} 1 {userid} {pageid} {postid} {type} {ds} - handleSkewJoin: false keys: - 0 [Column[userid], Column[pageid], Column[postid], Column[type]] - 1 [Column[userid], Column[pageid], Column[postid], Column[type]] + 0 userid (type: int), pageid (type: int), postid (type: int), type (type: string) + 1 userid (type: int), pageid (type: int), postid (type: int), type (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col8, _col9, _col10, _col11 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: int - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col7 - type: int - expr: _col8 - type: int - expr: _col9 - type: int - expr: _col10 - type: string - expr: _col11 - type: string + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_11.q.out ql/src/test/results/clientpositive/smb_mapjoin_11.q.out index f40ccd0..5c7a6db 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_11.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_11.q.out @@ -56,7 +56,67 @@ POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchem POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) ds) '1')) (= (. (TOK_TABLE_OR_COL b) ds) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table1 + a + TOK_TABREF + TOK_TABNAME + test_table2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + ds + '1' + = + . + TOK_TABLE_OR_COL + b + ds + '1' + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + test_table3 + TOK_PARTSPEC + TOK_PARTVAL + ds + '1' + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -66,12 +126,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -79,18 +137,13 @@ STAGE PLANS: condition expressions: 0 {key} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col6 Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col6 - type: string + expressions: _col0 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/smb_mapjoin_12.q.out ql/src/test/results/clientpositive/smb_mapjoin_12.q.out index 97df6e7..0ddbe16 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_12.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_12.q.out @@ -76,7 +76,67 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchem POSTHOOK: Lineage: test_table2 PARTITION(ds=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) ds) '1')) (>= (. (TOK_TABLE_OR_COL b) ds) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table1 + a + TOK_TABREF + TOK_TABNAME + test_table2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + ds + '1' + >= + . + TOK_TABLE_OR_COL + b + ds + '1' + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + test_table3 + TOK_PARTSPEC + TOK_PARTVAL + ds + '1' + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -86,12 +146,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -99,18 +157,13 @@ STAGE PLANS: condition expressions: 0 {key} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col6 Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col6 - type: string + expressions: _col0 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false @@ -296,7 +349,73 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=3).value SIMPLE [(src)src.FieldSchem POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table3) a) (TOK_TABREF (TOK_TABNAME test_table1) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) ds) '1')) (= (. (TOK_TABLE_OR_COL b) ds) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '2')))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table3 + a + TOK_TABREF + TOK_TABNAME + test_table1 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + ds + '1' + = + . + TOK_TABLE_OR_COL + b + ds + '1' + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + test_table3 + TOK_PARTSPEC + TOK_PARTVAL + ds + '2' + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + TOK_FUNCTION + concat + . + TOK_TABLE_OR_COL + a + value + . + TOK_TABLE_OR_COL + b + value + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -306,12 +425,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 3084 dataSize: 32904 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 3084 Data size: 32904 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -319,18 +436,13 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col6 Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: concat(_col1, _col6) - type: string + expressions: _col0 (type: int), concat(_col1, _col6) (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/smb_mapjoin_13.q.out ql/src/test/results/clientpositive/smb_mapjoin_13.q.out index ace114f..efc781d 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_13.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_13.q.out @@ -73,7 +73,48 @@ POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, ty POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table1 + a + TOK_TABREF + TOK_TABNAME + test_table2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + key + TOK_LIMIT + 10 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -82,12 +123,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -95,38 +134,19 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} {key} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[value]] + 0 key (type: int) + 1 value (type: int) outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -182,6 +202,7 @@ STAGE PLANS: Reduce Operator Tree: Extract Limit + Number of rows: 10 File Output Operator compressed: false GlobalTableId: 0 @@ -254,7 +275,48 @@ POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, ty POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table3) a) (TOK_TABREF (TOK_TABNAME test_table4) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table3 + a + TOK_TABREF + TOK_TABNAME + test_table4 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + key + TOK_LIMIT + 10 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -263,12 +325,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -276,44 +336,22 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [GenericUDFBridge(Column[key])] - 1 [GenericUDFBridge(Column[value])] + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(value) (type: double) outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - Statistics: - numRows: 550 dataSize: 5843 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 550 dataSize: 5843 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Statistics: - numRows: 550 dataSize: 5843 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -324,8 +362,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -381,18 +418,16 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 550 dataSize: 5843 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Limit - Statistics: - numRows: 10 dataSize: 100 basicStatsState: COMPLETE colStatsState: NONE + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 10 dataSize: 100 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_14.q.out ql/src/test/results/clientpositive/smb_mapjoin_14.q.out index 997bdfa..3885532 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_14.q.out @@ -50,9 +50,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -60,49 +57,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 1 + 0 key (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -155,9 +140,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-3 depends on stages: Stage-1 @@ -166,68 +148,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -235,25 +193,16 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + value expressions: _col0 (type: int), _col1 (type: bigint) Reduce Operator Tree: Extract File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -319,9 +268,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -330,68 +276,46 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:subq1:b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count() + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -399,30 +323,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -483,9 +398,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq1))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -493,58 +405,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -609,9 +507,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq2))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -619,49 +514,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -750,9 +633,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq4) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL subq4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq2))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -760,58 +640,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq4:subq3:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key < 8) and (key < 6)) - type: boolean + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -890,9 +756,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq1))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -900,58 +763,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 8) - type: boolean + predicate: (key < 8) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1006,9 +855,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) 1) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) 1) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq1))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -1025,73 +871,65 @@ STAGE PLANS: subq1:a TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key + 1) - type: int + expressions: (key + 1) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 _col0 (type: int) Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq2:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key + 1) - type: int + expressions: (key + 1) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1142,9 +980,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_TABREF (TOK_TABNAME tbl2) a) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL a) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq1))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1152,49 +987,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1241,9 +1064,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_TABREF (TOK_TABNAME tbl2) a) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL a) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1251,58 +1071,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1359,9 +1165,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq3) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq1 subq2))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1369,19 +1172,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - subq3:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 6) - type: boolean + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int + expressions: key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -1390,40 +1191,28 @@ STAGE PLANS: 0 1 2 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - 2 [Column[_col0]] - Position of Big Table: 2 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1496,9 +1285,6 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq2))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) value2)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1506,49 +1292,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 1 + 0 _col0 (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_15.q.out ql/src/test/results/clientpositive/smb_mapjoin_15.q.out index f85b6c3..3392187 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_15.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_15.q.out @@ -47,7 +47,48 @@ POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, ty POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table1 + a + TOK_TABREF + TOK_TABNAME + test_table2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + key + TOK_LIMIT + 10 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -56,12 +97,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -69,38 +108,19 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -156,6 +176,7 @@ STAGE PLANS: Reduce Operator Tree: Extract Limit + Number of rows: 10 File Output Operator compressed: false GlobalTableId: 0 @@ -292,7 +313,58 @@ POSTHOOK: Lineage: test_table2.key2 EXPRESSION [(src)src.FieldSchema(name:key, t POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table1 + a + TOK_TABREF + TOK_TABNAME + test_table2 + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + key2 + . + TOK_TABLE_OR_COL + b + key2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + key + TOK_LIMIT + 10 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -301,12 +373,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 500 dataSize: 7218 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -314,46 +384,19 @@ STAGE PLANS: condition expressions: 0 {key} {key2} {value} 1 {key} {key2} {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[key2]] - 1 [Column[key], Column[key2]] + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7 Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col5 - type: int - expr: _col6 - type: int - expr: _col7 - type: string + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col3 - type: int - expr: _col4 - type: int - expr: _col5 - type: string + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -409,6 +452,7 @@ STAGE PLANS: Reduce Operator Tree: Extract Limit + Number of rows: 10 File Output Operator compressed: false GlobalTableId: 0 @@ -485,7 +529,58 @@ POSTHOOK: Lineage: test_table2.key2 EXPRESSION [(src)src.FieldSchema(name:key, t POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2)) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table1 + a + TOK_TABREF + TOK_TABNAME + test_table2 + b + and + = + . + TOK_TABLE_OR_COL + a + key2 + . + TOK_TABLE_OR_COL + b + key2 + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + key + TOK_LIMIT + 10 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -494,12 +589,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 500 dataSize: 7218 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -507,46 +600,19 @@ STAGE PLANS: condition expressions: 0 {key} {key2} {value} 1 {key} {key2} {value} - handleSkewJoin: false keys: - 0 [Column[key2], Column[key]] - 1 [Column[key2], Column[key]] + 0 key2 (type: int), key (type: int) + 1 key2 (type: int), key (type: int) outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7 Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col5 - type: int - expr: _col6 - type: int - expr: _col7 - type: string + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col3 - type: int - expr: _col4 - type: int - expr: _col5 - type: string + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -602,6 +668,7 @@ STAGE PLANS: Reduce Operator Tree: Extract Limit + Number of rows: 10 File Output Operator compressed: false GlobalTableId: 0 @@ -678,7 +745,58 @@ POSTHOOK: Lineage: test_table2.key2 EXPRESSION [(src)src.FieldSchema(name:key, t POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table1 + a + TOK_TABREF + TOK_TABNAME + test_table2 + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + value + . + TOK_TABLE_OR_COL + b + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + key + TOK_LIMIT + 10 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -687,12 +805,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 500 dataSize: 7218 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -700,53 +816,23 @@ STAGE PLANS: condition expressions: 0 {key} {key2} {value} 1 {key} {key2} {value} - handleSkewJoin: false keys: - 0 [Column[key], Column[value]] - 1 [Column[key], Column[value]] + 0 key (type: int), value (type: string) + 1 key (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7 Position of Big Table: 0 - Statistics: - numRows: 550 dataSize: 7939 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col5 - type: int - expr: _col6 - type: int - expr: _col7 - type: string + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: - numRows: 550 dataSize: 7939 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Statistics: - numRows: 550 dataSize: 7939 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col3 - type: int - expr: _col4 - type: int - expr: _col5 - type: string + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -757,8 +843,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 500 dataSize: 7218 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -821,18 +906,16 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 550 dataSize: 7939 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE Limit - Statistics: - numRows: 10 dataSize: 140 basicStatsState: COMPLETE colStatsState: NONE + Number of rows: 10 + Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 10 dataSize: 140 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_16.q.out ql/src/test/results/clientpositive/smb_mapjoin_16.q.out index 6c7e061..baef673 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_16.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_16.q.out @@ -40,9 +40,6 @@ POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, ty POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -50,49 +47,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 0 + 0 key (type: int) + 1 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_17.q.out ql/src/test/results/clientpositive/smb_mapjoin_17.q.out index 876baf5..24936fe 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_17.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_17.q.out @@ -228,9 +228,6 @@ POSTHOOK: Lineage: test_table7.key EXPRESSION [(src)src.FieldSchema(name:key, ty POSTHOOK: Lineage: test_table7.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table8.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table8.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME test_table3) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME test_table4) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key))) (TOK_TABREF (TOK_TABNAME test_table5) e) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL e) key))) (TOK_TABREF (TOK_TABNAME test_table6) f) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL f) key))) (TOK_TABREF (TOK_TABNAME test_table7) g) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL g) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b c d e f g))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -238,10 +235,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -258,44 +255,32 @@ STAGE PLANS: 4 5 6 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - 3 [Column[key]] - 4 [Column[key]] - 5 [Column[key]] - 6 [Column[key]] - Position of Big Table: 0 + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -392,9 +377,6 @@ POSTHOOK: Lineage: test_table7.key EXPRESSION [(src)src.FieldSchema(name:key, ty POSTHOOK: Lineage: test_table7.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table8.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table8.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME test_table3) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME test_table4) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key))) (TOK_TABREF (TOK_TABNAME test_table5) e) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL e) key))) (TOK_TABREF (TOK_TABNAME test_table6) f) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL f) key))) (TOK_TABREF (TOK_TABNAME test_table7) g) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL g) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -402,10 +384,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 @@ -422,44 +404,32 @@ STAGE PLANS: 4 5 6 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - 3 [Column[key]] - 4 [Column[key]] - 5 [Column[key]] - 6 [Column[key]] - Position of Big Table: 0 + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -554,9 +524,6 @@ POSTHOOK: Lineage: test_table7.key EXPRESSION [(src)src.FieldSchema(name:key, ty POSTHOOK: Lineage: test_table7.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table8.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table8.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME test_table3) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME test_table4) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key))) (TOK_TABREF (TOK_TABNAME test_table5) e) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL e) key))) (TOK_TABREF (TOK_TABNAME test_table6) f) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL f) key))) (TOK_TABREF (TOK_TABNAME test_table7) g) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL g) key))) (TOK_TABREF (TOK_TABNAME test_table8) h) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL h) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -564,10 +531,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 @@ -586,45 +553,33 @@ STAGE PLANS: 5 6 7 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - 3 [Column[key]] - 4 [Column[key]] - 5 [Column[key]] - 6 [Column[key]] - 7 [Column[key]] - Position of Big Table: 0 + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + 7 key (type: int) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -751,9 +706,6 @@ POSTHOOK: Lineage: test_table7.key EXPRESSION [(src)src.FieldSchema(name:key, ty POSTHOOK: Lineage: test_table7.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table8.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table8.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME test_table3) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME test_table4) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key))) (TOK_TABREF (TOK_TABNAME test_table5) e) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL e) key))) (TOK_TABREF (TOK_TABNAME test_table6) f) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL f) key))) (TOK_TABREF (TOK_TABNAME test_table7) g) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL g) key))) (TOK_TABREF (TOK_TABNAME test_table8) h) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL h) key))) (TOK_TABREF (TOK_TABNAME test_table4) i) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL i) key))) (TOK_TABREF (TOK_TABNAME test_table5) j) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL j) key))) (TOK_TABREF (TOK_TABNAME test_table6) k) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL k) key))) (TOK_TABREF (TOK_TABNAME test_table7) l) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL l) key))) (TOK_TABREF (TOK_TABNAME test_table8) m) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL m) key))) (TOK_TABREF (TOK_TABNAME test_table7) n) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL n) key))) (TOK_TABREF (TOK_TABNAME test_table8) o) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL o) key))) (TOK_TABREF (TOK_TABNAME test_table4) p) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL p) key))) (TOK_TABREF (TOK_TABNAME test_table5) q) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL q) key))) (TOK_TABREF (TOK_TABNAME test_table6) r) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL r) key))) (TOK_TABREF (TOK_TABNAME test_table7) s) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL s) key))) (TOK_TABREF (TOK_TABNAME test_table8) t) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL t) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -761,10 +713,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 @@ -799,88 +751,61 @@ STAGE PLANS: 13 14 15 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - 3 [Column[key]] - 4 [Column[key]] - 5 [Column[key]] - 6 [Column[key]] - 7 [Column[key]] - 8 [Column[key]] - 9 [Column[key]] - 10 [Column[key]] - 11 [Column[key]] - 12 [Column[key]] - 13 [Column[key]] - 14 [Column[key]] - 15 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + 7 key (type: int) + 8 key (type: int) + 9 key (type: int) + 10 key (type: int) + 11 key (type: int) + 12 key (type: int) + 13 key (type: int) + 14 key (type: int) + 15 key (type: int) outputColumnNames: _col0, _col1 - Position of Big Table: 0 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - q + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) TableScan - alias: q + alias: t + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - r + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE TableScan - alias: r + alias: s + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 2 - s + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE TableScan - alias: s + alias: r + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 3 - t + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE TableScan - alias: t + alias: q + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 4 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -894,18 +819,12 @@ STAGE PLANS: 2 3 4 - handleSkewJoin: false outputColumnNames: _col24, _col25 Select Operator - expressions: - expr: _col24 - type: int - expr: _col25 - type: string + expressions: _col24 (type: int), _col25 (type: string) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_18.q.out ql/src/test/results/clientpositive/smb_mapjoin_18.q.out index e072cfb..4bd8566 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_18.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_18.q.out @@ -40,9 +40,6 @@ SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -51,20 +48,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -268,9 +262,6 @@ POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSch POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '2')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL a) key) 238))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -279,24 +270,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 238) - type: boolean + predicate: (key = 238) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -435,9 +422,6 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldS POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '3')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '2')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -446,20 +430,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_19.q.out ql/src/test/results/clientpositive/smb_mapjoin_19.q.out index 5541cc5..410c31c 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_19.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_19.q.out @@ -40,9 +40,6 @@ SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -51,20 +48,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_2.q.out ql/src/test/results/clientpositive/smb_mapjoin_2.q.out index 704dd3d..7ad5bcf 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_2.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_2.q.out @@ -37,9 +37,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -47,36 +44,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -104,9 +90,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -114,36 +97,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -174,9 +146,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -184,36 +153,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Right Outer Join0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -245,9 +203,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -255,36 +210,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Outer Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -319,9 +263,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -329,36 +270,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -386,9 +316,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -396,36 +323,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -456,9 +372,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -466,36 +379,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Right Outer Join0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -527,9 +429,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -537,36 +436,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Outer Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_20.q.out ql/src/test/results/clientpositive/smb_mapjoin_20.q.out index 815ab5f..a2bf58d 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_20.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_20.q.out @@ -40,9 +40,6 @@ SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -51,40 +48,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: value - type: string + expressions: key (type: int), value (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToString(_col0) - type: string + key expressions: UDFToString(_col0) (type: string) sort order: + - Map-reduce partition columns: - expr: UDFToString(_col0) - type: string - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + Map-reduce partition columns: UDFToString(_col0) (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -200,9 +183,6 @@ POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchem POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -211,22 +191,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: int - expr: value - type: string + expressions: value (type: string), key (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -383,9 +358,6 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.Fie POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '2')))) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL a) key))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -394,40 +366,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (key + key) - type: int - expr: value - type: string - expr: value - type: string + expressions: (key + key) (type: int), value (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToString(_col0) - type: string + key expressions: UDFToString(_col0) (type: string) sort order: + - Map-reduce partition columns: - expr: UDFToString(_col0) - type: string - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + Map-reduce partition columns: UDFToString(_col0) (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_21.q.out ql/src/test/results/clientpositive/smb_mapjoin_21.q.out index d91f4ae..4ba5a9a 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_21.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_21.q.out @@ -40,9 +40,6 @@ SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -51,20 +48,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -119,9 +113,6 @@ SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -130,36 +121,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: - - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -214,9 +195,6 @@ SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -225,38 +203,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -311,9 +277,6 @@ SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -322,36 +285,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -406,9 +359,6 @@ SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -417,36 +367,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -501,9 +441,6 @@ SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -512,33 +449,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_22.q.out ql/src/test/results/clientpositive/smb_mapjoin_22.q.out index 89abeb9..1c46b84 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_22.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_22.q.out @@ -38,9 +38,6 @@ SELECT * FROM test_table1 POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -49,20 +46,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test_table1 + Map Operator Tree: TableScan alias: test_table1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -226,9 +220,6 @@ POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, ty POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -237,20 +228,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - test_table1 + Map Operator Tree: TableScan alias: test_table1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_25.q.out ql/src/test/results/clientpositive/smb_mapjoin_25.q.out index 15ad7b1..d5187cf 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_25.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_25.q.out @@ -37,9 +37,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) key) 5)))) t1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket_2) c) (TOK_TABREF (TOK_TABNAME smb_bucket_3) d) (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) key))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL c) key) 5)))) t2) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL t2) key) 5)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 @@ -49,42 +46,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1:a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 5) - type: boolean + predicate: (key = 5) (type: boolean) + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - t1:b + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: a + Statistics: Num rows: 52 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 5) - type: boolean + predicate: (key = 5) (type: boolean) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: @@ -92,16 +77,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -109,35 +92,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col0 - type: int - $INTNAME1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - value expressions: - expr: _col0 - type: int + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -145,22 +114,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 129 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col1 = 5) - type: boolean + predicate: (_col1 = 5) (type: boolean) + Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int + expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -168,42 +133,30 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - t2:c + Map Operator Tree: TableScan - alias: c + alias: d + Statistics: Num rows: 55 Data size: 222 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 5) - type: boolean + predicate: (key = 5) (type: boolean) + Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - t2:d + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE TableScan - alias: d + alias: c + Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 5) - type: boolean + predicate: (key = 5) (type: boolean) + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: int + key expressions: key (type: int) sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: @@ -211,16 +164,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -246,9 +197,6 @@ POSTHOOK: query: -- explain explain select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) key) 5)))) t1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket_2) c) (TOK_TABREF (TOK_TABNAME smb_bucket_3) d) (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) key))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL c) key) 5)))) t2) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL t2) key) 5)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -256,79 +204,55 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 52 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 5) - type: boolean + predicate: (key = 5) (type: boolean) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col0 - type: int - t2:c + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int) TableScan alias: c + Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 5) - type: boolean + predicate: (key = 5) (type: boolean) + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - value expressions: - expr: _col0 - type: int + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -336,22 +260,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1 Filter Operator - predicate: - expr: (_col1 = 5) - type: boolean + predicate: (_col1 = 5) (type: boolean) Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int + expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_3.q.out ql/src/test/results/clientpositive/smb_mapjoin_3.q.out index af803ad..29b74d9 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_3.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_3.q.out @@ -37,9 +37,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_2 a join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket_2) a) (TOK_TABREF (TOK_TABNAME smb_bucket_3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -47,36 +44,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -104,9 +90,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_2 a left outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_2) a) (TOK_TABREF (TOK_TABNAME smb_bucket_3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -114,36 +97,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -173,9 +145,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_2 a right outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_2) a) (TOK_TABREF (TOK_TABNAME smb_bucket_3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -183,36 +152,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Right Outer Join0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -244,9 +202,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_2 a full outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_2) a) (TOK_TABREF (TOK_TABNAME smb_bucket_3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -254,36 +209,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Outer Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -317,9 +261,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_2 a join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket_2) a) (TOK_TABREF (TOK_TABNAME smb_bucket_3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -327,36 +268,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -384,9 +314,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_2 a left outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_2) a) (TOK_TABREF (TOK_TABNAME smb_bucket_3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -394,36 +321,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -453,9 +369,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_2 a right outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_2) a) (TOK_TABREF (TOK_TABNAME smb_bucket_3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -463,36 +376,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Right Outer Join0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -524,9 +426,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_2 a full outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_2) a) (TOK_TABREF (TOK_TABNAME smb_bucket_3) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -534,36 +433,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Outer Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_4.q.out ql/src/test/results/clientpositive/smb_mapjoin_4.q.out index 08a8fe1..18825ef 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_4.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_4.q.out @@ -37,9 +37,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -47,10 +44,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -59,31 +56,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -111,9 +93,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -121,10 +100,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 @@ -133,31 +112,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -185,9 +149,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -195,10 +156,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 @@ -207,31 +168,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -264,9 +210,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -274,10 +217,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 @@ -286,31 +229,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -344,9 +272,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -354,10 +279,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 @@ -366,31 +291,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -429,9 +339,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -439,10 +346,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Right Outer Join0 to 1 @@ -451,31 +358,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -505,9 +397,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -515,10 +404,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Right Outer Join0 to 1 @@ -527,31 +416,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -583,9 +457,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -593,10 +464,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Right Outer Join0 to 1 @@ -605,31 +476,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -663,9 +519,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -673,10 +526,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Right Outer Join0 to 1 @@ -685,31 +538,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -745,9 +583,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -755,10 +590,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Outer Join 0 to 1 @@ -767,31 +602,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -821,9 +641,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -831,10 +648,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Outer Join 0 to 1 @@ -843,31 +660,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -904,9 +706,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -914,10 +713,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Outer Join 0 to 1 @@ -926,31 +725,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -984,9 +768,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -994,10 +775,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Outer Join 0 to 1 @@ -1006,31 +787,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_5.q.out ql/src/test/results/clientpositive/smb_mapjoin_5.q.out index 80eafe7..f6553b6 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_5.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_5.q.out @@ -37,9 +37,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a c))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -47,10 +44,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -59,31 +56,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -111,9 +93,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a c))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -121,10 +100,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 @@ -133,31 +112,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -185,9 +149,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a c))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -195,10 +156,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 @@ -207,31 +168,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -264,9 +210,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a c))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -274,10 +217,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 @@ -286,31 +229,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -344,9 +272,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a c))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -354,10 +279,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join0 to 1 @@ -366,31 +291,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -429,9 +339,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a c))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -439,10 +346,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Right Outer Join0 to 1 @@ -451,31 +358,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -505,9 +397,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a c))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -515,10 +404,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Right Outer Join0 to 1 @@ -527,31 +416,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -583,9 +457,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a c))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -593,10 +464,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Right Outer Join0 to 1 @@ -605,31 +476,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -663,9 +519,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a c))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -673,10 +526,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Right Outer Join0 to 1 @@ -685,31 +538,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -745,9 +583,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a c))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -755,10 +590,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Outer Join 0 to 1 @@ -767,31 +602,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -821,9 +641,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a c))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -831,10 +648,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Outer Join 0 to 1 @@ -843,31 +660,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -904,9 +706,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a c))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -914,10 +713,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Outer Join 0 to 1 @@ -926,31 +725,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -984,9 +768,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket_3) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a c))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -994,10 +775,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Outer Join 0 to 1 @@ -1006,31 +787,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_6.q.out ql/src/test/results/clientpositive/smb_mapjoin_6.q.out index 5ba11d5..9ac0df1 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_6.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_6.q.out @@ -56,9 +56,6 @@ POSTHOOK: Lineage: smb_bucket4_1.key EXPRESSION [(src)src.FieldSchema(name:key, POSTHOOK: Lineage: smb_bucket4_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smb_bucket4_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smb_bucket4_2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket4_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket4_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME smb_join_results))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -72,36 +69,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -132,12 +118,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -146,12 +130,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1312,9 +1294,6 @@ POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket4_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket4_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME smb_join_results))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 @@ -1328,36 +1307,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1388,12 +1356,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1402,12 +1368,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2668,9 +2632,6 @@ POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket4_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket4_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME smb_join_results))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL a) key) 1000)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -2684,40 +2645,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 1000) - type: boolean + predicate: (key > 1000) (type: boolean) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2748,12 +2697,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2762,12 +2709,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2856,9 +2801,6 @@ POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket4_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket4_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME smb_join_results))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL a) key) 1000)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 @@ -2872,40 +2814,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 1000) - type: boolean + predicate: (key > 1000) (type: boolean) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2936,12 +2866,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2950,12 +2878,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3050,9 +2976,6 @@ POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME smb_bucket4_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket4_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME smb_bucket4_2) c) (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b c))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL a) key) 1000)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -3060,14 +2983,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 1000) - type: boolean + predicate: (key > 1000) (type: boolean) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -3076,31 +2998,16 @@ STAGE PLANS: 0 {key} {value} 1 {key} {value} 2 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - expr: _col8 - type: int - expr: _col9 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col8 (type: int), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_7.q.out ql/src/test/results/clientpositive/smb_mapjoin_7.q.out index 1c06a98..8be4606 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_7.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_7.q.out @@ -623,9 +623,6 @@ POSTHOOK: Lineage: smb_join_results_empty_bigtable.v1 SIMPLE [(smb_bucket4_1)a.F POSTHOOK: Lineage: smb_join_results_empty_bigtable.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: smb_join_results_empty_bigtable.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: smb_join_results_empty_bigtable.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME smb_bucket4_1) a) (TOK_TABREF (TOK_TABNAME smb_bucket4_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME smb_join_results))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -639,36 +636,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Outer Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -699,12 +685,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -713,12 +697,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/sort.q.out ql/src/test/results/clientpositive/sort.q.out index 98c294b..dbabafc 100644 --- ql/src/test/results/clientpositive/sort.q.out +++ ql/src/test/results/clientpositive/sort.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.* FROM SRC x SORT BY key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -14,33 +11,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - x + Map Operator Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out index 92aeb85..d6395cc 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out @@ -54,9 +54,6 @@ POSTHOOK: Lineage: table_desc1.key SIMPLE [(src)src.FieldSchema(name:key, type:s POSTHOOK: Lineage: table_desc1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: table_desc2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: table_desc2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table_desc1) a) (TOK_TABREF (TOK_TABNAME table_desc2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 10)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -64,53 +61,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 0 + 0 key (type: string) + 1 key (type: string) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out index 667d8c7..2ca9d8a 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out @@ -62,9 +62,6 @@ POSTHOOK: Lineage: table_desc1.key SIMPLE [(src)src.FieldSchema(name:key, type:s POSTHOOK: Lineage: table_desc1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: table_desc2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: table_desc2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table_desc1) a) (TOK_TABREF (TOK_TABNAME table_desc2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 10)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -72,53 +69,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key], Column[value]] - 1 [Column[key], Column[value]] - Position of Big Table: 0 + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out index 6b76fd3..c7f8c26 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out @@ -62,9 +62,6 @@ POSTHOOK: Lineage: table_desc1.key SIMPLE [(src)src.FieldSchema(name:key, type:s POSTHOOK: Lineage: table_desc1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: table_desc2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: table_desc2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table_desc1) a) (TOK_TABREF (TOK_TABNAME table_desc2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 10)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -72,53 +69,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key], Column[value]] - 1 [Column[key], Column[value]] - Position of Big Table: 0 + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out index 9a6bb3e..3a1ed8f 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out @@ -60,9 +60,6 @@ POSTHOOK: Lineage: table_desc1.key SIMPLE [(src)src.FieldSchema(name:key, type:s POSTHOOK: Lineage: table_desc1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: table_desc2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: table_desc2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table_desc1) a) (TOK_TABREF (TOK_TABNAME table_desc2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 10)))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -79,71 +76,63 @@ STAGE PLANS: b TableScan alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key], Column[value]] - 1 [Column[key], Column[value]] - Position of Big Table: 0 + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key], Column[value]] - 1 [Column[key], Column[value]] - Position of Big Table: 0 + 0 key (type: string), value (type: string) + 1 key (type: string), value (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out index e06ace4..9d4d504 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out @@ -67,7 +67,55 @@ POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '1' + = + . + TOK_TABLE_OR_COL + b + part + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -76,12 +124,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -89,24 +135,19 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) Position of Big Table: 0 Select Operator Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -159,15 +200,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out index 8b444a8..6ee7b0a 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out @@ -67,7 +67,55 @@ POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '1' + = + . + TOK_TABLE_OR_COL + b + part + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -76,12 +124,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -89,33 +135,24 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) Position of Big Table: 0 - Statistics: - numRows: 550 dataSize: 5843 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - Statistics: - numRows: 550 dataSize: 5843 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -168,8 +205,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -230,27 +266,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out index 55d36b2..a0c869a 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out @@ -131,7 +131,55 @@ POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + a + part + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + b + part + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -140,12 +188,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 1000 dataSize: 10624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -153,32 +199,23 @@ STAGE PLANS: condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) Position of Big Table: 0 - Statistics: - numRows: 1100 dataSize: 11686 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: - numRows: 1100 dataSize: 11686 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: bigint + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Alias -> Map Local Tables: @@ -272,8 +309,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 1000 dataSize: 10624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Path -> Alias: #### A masked pattern was here #### @@ -370,27 +406,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/source.q.out ql/src/test/results/clientpositive/source.q.out index 8ec8714..b01c8ca 100644 --- ql/src/test/results/clientpositive/source.q.out +++ ql/src/test/results/clientpositive/source.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x.* FROM SRC x POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME x)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -17,13 +14,11 @@ STAGE PLANS: Processor Tree: TableScan alias: x + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT x.* FROM SRC x diff --git ql/src/test/results/clientpositive/stats0.q.out ql/src/test/results/clientpositive/stats0.q.out index 9530385..15b270f 100644 --- ql/src/test/results/clientpositive/stats0.q.out +++ ql/src/test/results/clientpositive/stats0.q.out @@ -12,7 +12,21 @@ insert overwrite table stats_non_partitioned select * from src POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME stats_non_partitioned))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + stats_non_partitioned + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -22,29 +36,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -686,9 +692,6 @@ select * from src POSTHOOK: type: QUERY POSTHOOK: Lineage: stats_non_partitioned.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_non_partitioned.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME stats_partitioned) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -697,20 +700,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1351,7 +1351,21 @@ POSTHOOK: Lineage: stats_non_partitioned.value SIMPLE [(src)src.FieldSchema(name POSTHOOK: Lineage: stats_partitioned PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_partitioned PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME stats_non_partitioned))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + stats_non_partitioned + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1366,29 +1380,21 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1492,8 +1498,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -1559,8 +1564,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -2199,9 +2203,6 @@ POSTHOOK: Lineage: stats_non_partitioned.value SIMPLE [(src)src.FieldSchema(name POSTHOOK: Lineage: stats_non_partitioned.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: stats_partitioned PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_partitioned PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME stats_partitioned) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -2215,20 +2216,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2261,12 +2259,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2275,12 +2271,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/stats1.q.out ql/src/test/results/clientpositive/stats1.q.out index 73d870c..690a53d 100644 --- ql/src/test/results/clientpositive/stats1.q.out +++ ql/src/test/results/clientpositive/stats1.q.out @@ -17,9 +17,6 @@ FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1 UNION ALL SELECT s2.key AS key, s2.value AS value FROM src1 s2) unionsrc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_FUNCTION count 1)) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME tmptable))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -29,40 +26,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:unionsrc-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst1' - type: string - expr: UDFToString(_col0) - type: string + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -70,46 +60,38 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 2 Data size: 488 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - null-subquery2:unionsrc-subquery2:s2 TableScan alias: s2 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 2 Data size: 488 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/stats10.q.out ql/src/test/results/clientpositive/stats10.q.out index b0b51ff..86ad524 100644 --- ql/src/test/results/clientpositive/stats10.q.out +++ ql/src/test/results/clientpositive/stats10.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: explain insert overwrite table bucket3_1 partition (ds='1') select * from src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucket3_1) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,40 +19,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Map-reduce partition columns: - expr: UDFToInteger(_col0) - type: int - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -392,9 +378,6 @@ POSTHOOK: Lineage: bucket3_1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchem POSTHOOK: Lineage: bucket3_1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: bucket3_1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bucket3_1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME bucket3_1) (TOK_PARTSPEC (TOK_PARTVAL ds)))) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 depends on stages: Stage-0 @@ -402,10 +385,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - bucket3_1 + Map Operator Tree: TableScan alias: bucket3_1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-1 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/stats11.q.out ql/src/test/results/clientpositive/stats11.q.out index 8d18093..5e65bc5 100644 --- ql/src/test/results/clientpositive/stats11.q.out +++ ql/src/test/results/clientpositive/stats11.q.out @@ -26,9 +26,6 @@ PREHOOK: type: LOAD POSTHOOK: query: explain load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') POSTHOOK: type: LOAD -ABSTRACT SYNTAX TREE: - (TOK_LOAD '../../data/files/srcbucket20.txt' (TOK_TAB (TOK_TABNAME srcbucket_mapjoin_part) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08'))) local) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 depends on stages: Stage-0 @@ -281,7 +278,61 @@ from srcbucket_mapjoin a join srcbucket_mapjoin_part b on a.key=b.key where b.ds="2008-04-08" POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -296,12 +347,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a - Statistics: - numRows: 26 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -309,33 +358,23 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col5 Position of Big Table: 0 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -407,8 +446,7 @@ STAGE PLANS: b TableScan alias: b - Statistics: - numRows: 55 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -503,8 +541,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -570,8 +607,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -809,7 +845,61 @@ POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a. POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -824,12 +914,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b - Statistics: - numRows: 55 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Map Join Operator condition map: @@ -837,33 +925,23 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col5 Position of Big Table: 1 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 60 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -898,8 +976,7 @@ STAGE PLANS: a TableScan alias: a - Statistics: - numRows: 26 dataSize: 2750 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Bucket Mapjoin Context: Alias Bucket Base File Name Mapping: @@ -998,8 +1075,7 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator @@ -1080,8 +1156,7 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false File Output Operator diff --git ql/src/test/results/clientpositive/stats12.q.out ql/src/test/results/clientpositive/stats12.q.out index ebe721f..923412e 100644 --- ql/src/test/results/clientpositive/stats12.q.out +++ ql/src/test/results/clientpositive/stats12.q.out @@ -45,7 +45,18 @@ POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [ POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr)))) + +TOK_ANALYZE + TOK_TAB + TOK_TABNAME + analyze_srcpart + TOK_PARTSPEC + TOK_PARTVAL + ds + '2008-04-08' + TOK_PARTVAL + hr + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -54,12 +65,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - analyze_srcpart + Map Operator Tree: TableScan alias: analyze_srcpart - Statistics: - numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Statistics Aggregation Key Prefix: default.analyze_srcpart/ GatherStats: true Path -> Alias: diff --git ql/src/test/results/clientpositive/stats13.q.out ql/src/test/results/clientpositive/stats13.q.out index cd23f6a..bd997cb 100644 --- ql/src/test/results/clientpositive/stats13.q.out +++ ql/src/test/results/clientpositive/stats13.q.out @@ -45,7 +45,19 @@ POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [ POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 11)))) + +TOK_ANALYZE + TOK_TAB + TOK_TABNAME + analyze_srcpart + TOK_PARTSPEC + TOK_PARTVAL + ds + '2008-04-08' + TOK_PARTVAL + hr + 11 + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -54,12 +66,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - analyze_srcpart + Map Operator Tree: TableScan alias: analyze_srcpart - Statistics: - numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Statistics Aggregation Key Prefix: default.analyze_srcpart/ GatherStats: true Path -> Alias: diff --git ql/src/test/results/clientpositive/stats2.q.out ql/src/test/results/clientpositive/stats2.q.out index 16282e4..761f5f0 100644 --- ql/src/test/results/clientpositive/stats2.q.out +++ ql/src/test/results/clientpositive/stats2.q.out @@ -9,9 +9,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table analyze_t1 partition (ds, hr) select * from srcpart where ds is not null POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME analyze_t1) (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL ds))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -19,24 +16,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -138,9 +128,6 @@ POSTHOOK: Lineage: analyze_t1 PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpar POSTHOOK: Lineage: analyze_t1 PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: analyze_t1 PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_t1 PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_t1) (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr)))) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 depends on stages: Stage-0 @@ -148,10 +135,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - analyze_t1 + Map Operator Tree: TableScan alias: analyze_t1 + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Stage: Stage-1 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/stats3.q.out ql/src/test/results/clientpositive/stats3.q.out index d62f942..83ca1a3 100644 --- ql/src/test/results/clientpositive/stats3.q.out +++ ql/src/test/results/clientpositive/stats3.q.out @@ -18,7 +18,15 @@ POSTHOOK: query: explain extended load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src POSTHOOK: type: LOAD ABSTRACT SYNTAX TREE: - (TOK_LOAD '../../data/files/test.dat' (TOK_TAB (TOK_TABNAME hive_test_src)) local overwrite) + +TOK_LOAD + '../../data/files/test.dat' + TOK_TAB + TOK_TABNAME + hive_test_src + local + overwrite + STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/stats4.q.out ql/src/test/results/clientpositive/stats4.q.out index 0927a27..3e9e522 100644 --- ql/src/test/results/clientpositive/stats4.q.out +++ ql/src/test/results/clientpositive/stats4.q.out @@ -34,9 +34,6 @@ from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' insert overwrite table nzhang_part2 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part1) (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (<= (TOK_TABLE_OR_COL ds) '2008-04-08'))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME nzhang_part2) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-12-31') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL hr))) (TOK_WHERE (> (TOK_TABLE_OR_COL ds) '2008-04-08')))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 @@ -57,49 +54,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (ds <= '2008-04-08') - type: boolean + predicate: (ds <= '2008-04-08') (type: boolean) + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 Filter Operator - predicate: - expr: (ds > '2008-04-08') - type: boolean + predicate: (ds > '2008-04-08') (type: boolean) + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 38 Data size: 7615 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -133,12 +116,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -147,12 +128,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -192,12 +171,10 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -206,12 +183,10 @@ STAGE PLANS: Stage: Stage-12 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/stats5.q.out ql/src/test/results/clientpositive/stats5.q.out index bf5ab3c..14de4f8 100644 --- ql/src/test/results/clientpositive/stats5.q.out +++ ql/src/test/results/clientpositive/stats5.q.out @@ -9,9 +9,6 @@ PREHOOK: query: explain analyze table analyze_src compute statistics PREHOOK: type: QUERY POSTHOOK: query: explain analyze table analyze_src compute statistics POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_src))) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 depends on stages: Stage-0 @@ -19,10 +16,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - analyze_src + Map Operator Tree: TableScan alias: analyze_src + Statistics: Num rows: -1 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Stage: Stage-1 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/stats7.q.out ql/src/test/results/clientpositive/stats7.q.out index 995747b..81a25f4 100644 --- ql/src/test/results/clientpositive/stats7.q.out +++ ql/src/test/results/clientpositive/stats7.q.out @@ -42,9 +42,6 @@ POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(s POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr)))) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 depends on stages: Stage-0 @@ -52,10 +49,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - analyze_srcpart + Map Operator Tree: TableScan alias: analyze_srcpart + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Stage: Stage-1 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/stats8.q.out ql/src/test/results/clientpositive/stats8.q.out index 5011411..5cd4ef5 100644 --- ql/src/test/results/clientpositive/stats8.q.out +++ ql/src/test/results/clientpositive/stats8.q.out @@ -42,9 +42,6 @@ POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(s POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 11)))) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 depends on stages: Stage-0 @@ -52,10 +49,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - analyze_srcpart + Map Operator Tree: TableScan alias: analyze_srcpart + Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE Stage: Stage-1 Stats-Aggr Operator @@ -183,9 +180,6 @@ POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(s POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 12)))) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 depends on stages: Stage-0 @@ -193,10 +187,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - analyze_srcpart + Map Operator Tree: TableScan alias: analyze_srcpart + Statistics: Num rows: 500 Data size: 5312 Basic stats: PARTIAL Column stats: COMPLETE Stage: Stage-1 Stats-Aggr Operator @@ -281,9 +275,6 @@ POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(s POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-09') (TOK_PARTVAL hr 11)))) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 depends on stages: Stage-0 @@ -291,10 +282,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - analyze_srcpart + Map Operator Tree: TableScan alias: analyze_srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: PARTIAL Column stats: COMPLETE Stage: Stage-1 Stats-Aggr Operator @@ -379,9 +370,6 @@ POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(s POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-09') (TOK_PARTVAL hr 12)))) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 depends on stages: Stage-0 @@ -389,10 +377,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - analyze_srcpart + Map Operator Tree: TableScan alias: analyze_srcpart + Statistics: Num rows: 1500 Data size: 15936 Basic stats: PARTIAL Column stats: COMPLETE Stage: Stage-1 Stats-Aggr Operator @@ -477,9 +465,6 @@ POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(s POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr)))) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 depends on stages: Stage-0 @@ -487,10 +472,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - analyze_srcpart + Map Operator Tree: TableScan alias: analyze_srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-1 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/stats9.q.out ql/src/test/results/clientpositive/stats9.q.out index ca5af04..51a29ee 100644 --- ql/src/test/results/clientpositive/stats9.q.out +++ ql/src/test/results/clientpositive/stats9.q.out @@ -19,9 +19,6 @@ POSTHOOK: query: explain analyze table analyze_srcbucket compute statistics POSTHOOK: type: QUERY POSTHOOK: Lineage: analyze_srcbucket.key SIMPLE [(srcbucket)srcbucket.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: analyze_srcbucket.value SIMPLE [(srcbucket)srcbucket.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcbucket))) - STAGE DEPENDENCIES: Stage-0 is a root stage Stage-1 depends on stages: Stage-0 @@ -29,10 +26,10 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Map Reduce - Alias -> Map Operator Tree: - analyze_srcbucket + Map Operator Tree: TableScan alias: analyze_srcbucket + Statistics: Num rows: -1 Data size: 11603 Basic stats: PARTIAL Column stats: COMPLETE Stage: Stage-1 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out index 2f66026..a5caf2e 100644 --- ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out +++ ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out @@ -15,9 +15,6 @@ PREHOOK: query: explain insert overwrite table tmptable partition (part) select PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table tmptable partition (part) select key, value from src where key = 'no_such_value' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME tmptable) (TOK_PARTSPEC (TOK_PARTVAL part)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 'no_such_value')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -31,24 +28,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 'no_such_value') - type: boolean + predicate: (key = 'no_such_value') (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -81,12 +74,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -95,12 +86,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/stats_noscan_1.q.out ql/src/test/results/clientpositive/stats_noscan_1.q.out index 615a97f..f92ded8 100644 --- ql/src/test/results/clientpositive/stats_noscan_1.q.out +++ ql/src/test/results/clientpositive/stats_noscan_1.q.out @@ -50,9 +50,6 @@ POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(s POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 11))) noscan) - STAGE DEPENDENCIES: Stage-1 is a root stage @@ -425,9 +422,6 @@ POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).key SI POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart_partial) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08'))) noscan) - STAGE DEPENDENCIES: Stage-1 is a root stage diff --git ql/src/test/results/clientpositive/stats_only_null.q.out ql/src/test/results/clientpositive/stats_only_null.q.out index 63e70c7..1023f5e 100644 --- ql/src/test/results/clientpositive/stats_only_null.q.out +++ ql/src/test/results/clientpositive/stats_only_null.q.out @@ -85,9 +85,6 @@ POSTHOOK: Lineage: stats_null_part PARTITION(dt=2011).a SIMPLE [(temps_null)temp POSTHOOK: Lineage: stats_null_part PARTITION(dt=2011).b SIMPLE [(temps_null)temps_null.FieldSchema(name:b, type:int, comment:null), ] POSTHOOK: Lineage: stats_null_part PARTITION(dt=2011).c SIMPLE [(temps_null)temps_null.FieldSchema(name:c, type:string, comment:null), ] POSTHOOK: Lineage: stats_null_part PARTITION(dt=2011).d SIMPLE [(temps_null)temps_null.FieldSchema(name:d, type:smallint, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_null))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL a))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL d)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -95,72 +92,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - stats_null + Map Operator Tree: TableScan alias: stats_null + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: a - type: double - expr: b - type: int - expr: c - type: string - expr: d - type: smallint + expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) outputColumnNames: a, b, c, d + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - expr: count(a) - expr: count(b) - expr: count(c) - expr: count(d) - bucketGroup: false + aggregations: count(), count(a), count(b), count(c), count(d) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: bigint + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: count(VALUE._col1) - expr: count(VALUE._col2) - expr: count(VALUE._col3) - expr: count(VALUE._col4) - bucketGroup: false + aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -188,9 +149,6 @@ POSTHOOK: Lineage: stats_null_part PARTITION(dt=2011).a SIMPLE [(temps_null)temp POSTHOOK: Lineage: stats_null_part PARTITION(dt=2011).b SIMPLE [(temps_null)temps_null.FieldSchema(name:b, type:int, comment:null), ] POSTHOOK: Lineage: stats_null_part PARTITION(dt=2011).c SIMPLE [(temps_null)temps_null.FieldSchema(name:c, type:string, comment:null), ] POSTHOOK: Lineage: stats_null_part PARTITION(dt=2011).d SIMPLE [(temps_null)temps_null.FieldSchema(name:d, type:smallint, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_null_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL a))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL d)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -198,72 +156,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - stats_null_part + Map Operator Tree: TableScan alias: stats_null_part + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: a - type: double - expr: b - type: int - expr: c - type: string - expr: d - type: smallint + expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) outputColumnNames: a, b, c, d + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - expr: count(a) - expr: count(b) - expr: count(c) - expr: count(d) - bucketGroup: false + aggregations: count(), count(a), count(b), count(c), count(d) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: bigint + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: count(VALUE._col1) - expr: count(VALUE._col2) - expr: count(VALUE._col3) - expr: count(VALUE._col4) - bucketGroup: false + aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: bigint - expr: _col3 - type: bigint - expr: _col4 - type: bigint + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -461,9 +383,6 @@ POSTHOOK: Lineage: stats_null_part PARTITION(dt=2011).a SIMPLE [(temps_null)temp POSTHOOK: Lineage: stats_null_part PARTITION(dt=2011).b SIMPLE [(temps_null)temps_null.FieldSchema(name:b, type:int, comment:null), ] POSTHOOK: Lineage: stats_null_part PARTITION(dt=2011).c SIMPLE [(temps_null)temps_null.FieldSchema(name:c, type:string, comment:null), ] POSTHOOK: Lineage: stats_null_part PARTITION(dt=2011).d SIMPLE [(temps_null)temps_null.FieldSchema(name:d, type:smallint, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_null))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL a))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL d)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -490,9 +409,6 @@ POSTHOOK: Lineage: stats_null_part PARTITION(dt=2011).a SIMPLE [(temps_null)temp POSTHOOK: Lineage: stats_null_part PARTITION(dt=2011).b SIMPLE [(temps_null)temps_null.FieldSchema(name:b, type:int, comment:null), ] POSTHOOK: Lineage: stats_null_part PARTITION(dt=2011).c SIMPLE [(temps_null)temps_null.FieldSchema(name:c, type:string, comment:null), ] POSTHOOK: Lineage: stats_null_part PARTITION(dt=2011).d SIMPLE [(temps_null)temps_null.FieldSchema(name:d, type:smallint, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_null_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL a))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL d)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/stats_partscan_1.q.out ql/src/test/results/clientpositive/stats_partscan_1.q.out index d39e8b7..b3cfb61 100644 --- ql/src/test/results/clientpositive/stats_partscan_1.q.out +++ ql/src/test/results/clientpositive/stats_partscan_1.q.out @@ -114,9 +114,6 @@ POSTHOOK: Lineage: analyze_srcpart_partial_scan PARTITION(ds=2008-04-09,hr=11).k POSTHOOK: Lineage: analyze_srcpart_partial_scan PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart_partial_scan PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart_partial_scan PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart_partial_scan) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 11))) partialscan) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 diff --git ql/src/test/results/clientpositive/str_to_map.q.out ql/src/test/results/clientpositive/str_to_map.q.out index 256f5c1..3cce199 100644 --- ql/src/test/results/clientpositive/str_to_map.q.out +++ ql/src/test/results/clientpositive/str_to_map.q.out @@ -13,9 +13,6 @@ PREHOOK: query: explain select str_to_map('a=1,b=2,c=3',',','=')['a'] from src l PREHOOK: type: QUERY POSTHOOK: query: explain select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR ([ (TOK_FUNCTION str_to_map 'a=1,b=2,c=3' ',' '=') 'a'))) (TOK_LIMIT 3))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -26,12 +23,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: str_to_map('a=1,b=2,c=3',',','=')['a'] - type: string + expressions: str_to_map('a=1,b=2,c=3',',','=')['a'] (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 @@ -49,9 +48,6 @@ PREHOOK: query: explain select str_to_map('a:1,b:2,c:3') from src limit 3 PREHOOK: type: QUERY POSTHOOK: query: explain select str_to_map('a:1,b:2,c:3') from src limit 3 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION str_to_map 'a:1,b:2,c:3'))) (TOK_LIMIT 3))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -62,12 +58,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: str_to_map('a:1,b:2,c:3') - type: map + expressions: str_to_map('a:1,b:2,c:3') (type: map) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: select str_to_map('a:1,b:2,c:3') from src limit 3 @@ -85,9 +83,6 @@ PREHOOK: query: explain select str_to_map('a:1,b:2,c:3',',',':') from src limit PREHOOK: type: QUERY POSTHOOK: query: explain select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION str_to_map 'a:1,b:2,c:3' ',' ':'))) (TOK_LIMIT 3))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -98,12 +93,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: str_to_map('a:1,b:2,c:3',',',':') - type: map + expressions: str_to_map('a:1,b:2,c:3',',',':') (type: map) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 @@ -125,9 +122,6 @@ POSTHOOK: query: explain select str_to_map(t.ss,',',':')['a'] from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t limit 3 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST 'a:1,b:2,c:3') TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST ss)))))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR ([ (TOK_FUNCTION str_to_map (. (TOK_TABLE_OR_COL t) ss) ',' ':') 'a'))) (TOK_LIMIT 3))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -135,30 +129,31 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: 'a:1,b:2,c:3' - type: string + expressions: 'a:1,b:2,c:3' (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: str_to_map(_col0,',',':')['a'] - type: string + expressions: str_to_map(_col0,',',':')['a'] (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/subq.q.out ql/src/test/results/clientpositive/subq.q.out index adcf113..15f871b 100644 --- ql/src/test/results/clientpositive/subq.q.out +++ ql/src/test/results/clientpositive/subq.q.out @@ -10,9 +10,6 @@ FROM ( ) unioninput INSERT OVERWRITE DIRECTORY 'target/warehouse/union.out' SELECT unioninput.* POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL src) key) 100)))) unioninput)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR 'target/warehouse/union.out')) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME unioninput)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-6 depends on stages: Stage-1 , consists of Stage-3, Stage-2, Stage-4 @@ -25,24 +22,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - unioninput:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -65,12 +58,10 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -78,12 +69,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/subq2.q.out ql/src/test/results/clientpositive/subq2.q.out index bfd88ab..90ee186 100644 --- ql/src/test/results/clientpositive/subq2.q.out +++ ql/src/test/results/clientpositive/subq2.q.out @@ -8,9 +8,6 @@ SELECT a.k, a.c FROM (SELECT b.key as k, count(1) as c FROM src b GROUP BY b.key) a WHERE a.k >= 90 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) k) (TOK_SELEXPR (TOK_FUNCTION count 1) c)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) k)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) c))) (TOK_WHERE (>= (. (TOK_TABLE_OR_COL a) k) 90)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -18,60 +15,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key >= 90) - type: boolean + predicate: (key >= 90) (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/subq_where_serialization.q.out ql/src/test/results/clientpositive/subq_where_serialization.q.out index 6c4b4ad..9c26adf 100644 --- ql/src/test/results/clientpositive/subq_where_serialization.q.out +++ ql/src/test/results/clientpositive/subq_where_serialization.q.out @@ -2,9 +2,6 @@ PREHOOK: query: explain select src.key from src where src.key in ( select distin PREHOOK: type: QUERY POSTHOOK: query: explain select src.key from src where src.key in ( select distinct key from src) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key))) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) (. (TOK_TABLE_OR_COL src) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-2 @@ -15,54 +12,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - sq_1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string + keys: key (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -73,8 +57,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src Map Join Operator @@ -83,24 +66,17 @@ STAGE PLANS: condition expressions: 0 {key} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[_col0]] + 0 key (type: string) + 1 _col0 (type: string) outputColumnNames: _col0 - Position of Big Table: 0 Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -117,33 +93,22 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -151,20 +116,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/subquery_exists.q.out ql/src/test/results/clientpositive/subquery_exists.q.out index f55afd8..4265107 100644 --- ql/src/test/results/clientpositive/subquery_exists.q.out +++ ql/src/test/results/clientpositive/subquery_exists.q.out @@ -18,9 +18,6 @@ where exists where b.value = a.value and a.key = b.key and a.value > 'val_9' ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP exists) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL a) value)) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (> (. (TOK_TABLE_OR_COL a) value) 'val_9'))))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -28,64 +25,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string - expr: key - type: string + key expressions: value (type: string), key (type: string) sort order: ++ - Map-reduce partition columns: - expr: value - type: string - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - sq_1:a + Map-reduce partition columns: value (type: string), key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (value > 'val_9') - type: boolean + predicate: (value > 'val_9') (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: _col1, _col2 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col1 - type: string - expr: _col2 - type: string + keys: _col1 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -93,22 +62,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/subquery_exists_having.q.out ql/src/test/results/clientpositive/subquery_exists_having.q.out index 7266a6b..12b0bf4 100644 --- ql/src/test/results/clientpositive/subquery_exists_having.q.out +++ ql/src/test/results/clientpositive/subquery_exists_having.q.out @@ -20,9 +20,6 @@ having exists where a.key = b.key and a.value > 'val_9' ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)) (TOK_HAVING (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP exists) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (> (. (TOK_TABLE_OR_COL a) value) 'val_9'))))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -31,49 +28,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -81,51 +64,34 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - sq_1:a + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (value > 'val_9') - type: boolean + predicate: (value > 'val_9') (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col1 - type: string + keys: _col1 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -133,22 +99,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/subquery_in.q.out ql/src/test/results/clientpositive/subquery_in.q.out index bf7a482..48be22b 100644 --- ql/src/test/results/clientpositive/subquery_in.q.out +++ ql/src/test/results/clientpositive/subquery_in.q.out @@ -96,9 +96,6 @@ explain from src where src.key in (select key from src s1 where s1.key > '9') POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s1) key) '9')))) (. (TOK_TABLE_OR_COL src) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -106,52 +103,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - sq_1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '9') - type: boolean + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -159,22 +140,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -227,9 +204,6 @@ where b.key in where b.value = a.value and a.key > '9' ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL a) value)) (> (. (TOK_TABLE_OR_COL a) key) '9'))))) (. (TOK_TABLE_OR_COL b) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -237,64 +211,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - sq_1:a + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '9') - type: boolean + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -302,22 +248,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -378,9 +320,6 @@ part where part.p_size in where r <= 2 ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_name)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size))) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_size)) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL p_size)))))) r)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL p_size)))) (TOK_WHERE (<= (TOK_TABLE_OR_COL r) 2)))) (. (TOK_TABLE_OR_COL part) p_size))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -390,47 +329,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - sq_1:a:part + Map Operator Tree: TableScan alias: part + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ - Map-reduce partition columns: - expr: p_mfgr - type: string - tag: -1 - value expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_size (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE PTF Operator + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_wcol0 <= 2) - type: boolean + predicate: (_wcol0 <= 2) (type: boolean) + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col5 - type: int + expressions: _col5 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: avg(_col0) - bucketGroup: false + aggregations: avg(_col0) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -438,37 +365,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: avg(VALUE._col0) - bucketGroup: false + aggregations: avg(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double + expressions: _col0 (type: double) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: double + keys: _col0 (type: double) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -476,35 +395,22 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - Map-reduce partition columns: - expr: _col0 - type: double - tag: 1 - part + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE TableScan alias: part + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: UDFToDouble(p_size) - type: double + key expressions: UDFToDouble(p_size) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(p_size) - type: double - tag: 0 - value expressions: - expr: p_name - type: string - expr: p_size - type: int + Map-reduce partition columns: UDFToDouble(p_size) (type: double) + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_size (type: int) Reduce Operator Tree: Join Operator condition map: @@ -512,22 +418,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} {VALUE._col5} 1 - handleSkewJoin: false outputColumnNames: _col1, _col5 + Statistics: Num rows: 33 Data size: 3490 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 16 Data size: 1692 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col5 - type: int + expressions: _col1 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1692 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 16 Data size: 1692 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -577,9 +479,6 @@ from part b where b.p_size in where r <= 2 and b.p_mfgr = a.p_mfgr ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_name)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size))) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size)) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL p_size)))))) r)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL p_size)))) (TOK_WHERE (and (<= (TOK_TABLE_OR_COL r) 2) (= (. (TOK_TABLE_OR_COL b) p_mfgr) (. (TOK_TABLE_OR_COL a) p_mfgr)))))) (. (TOK_TABLE_OR_COL b) p_size))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -589,52 +488,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - sq_1:a:part + Map Operator Tree: TableScan alias: part + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ - Map-reduce partition columns: - expr: p_mfgr - type: string - tag: -1 - value expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_size (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE PTF Operator + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_wcol0 <= 2) - type: boolean + predicate: (_wcol0 <= 2) (type: boolean) + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col5 - type: int + expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: min(_col1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: min(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -642,50 +525,32 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: int + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reduce Operator Tree: Group By Operator - aggregations: - expr: min(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: int - expr: _col0 - type: string + expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: int - expr: _col1 - type: string + keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -693,45 +558,22 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: p_size (type: int), p_mfgr (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: string - tag: 1 - b + Map-reduce partition columns: p_size (type: int), p_mfgr (type: string) + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) TableScan - alias: b Reduce Output Operator - key expressions: - expr: p_size - type: int - expr: p_mfgr - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: p_size - type: int - expr: p_mfgr - type: string - tag: 0 - value expressions: - expr: p_name - type: string - expr: p_mfgr - type: string - expr: p_size - type: int + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -739,24 +581,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} {VALUE._col2} {VALUE._col5} 1 - handleSkewJoin: false outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 8 Data size: 1745 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col1 - type: string - expr: _col5 - type: int + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 1745 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 8 Data size: 1745 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -810,9 +646,6 @@ where b.key in where b.value = a.value and a.key > '9' ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL a) value)) (> (. (TOK_TABLE_OR_COL a) key) '9'))))) (. (TOK_TABLE_OR_COL b) key))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -821,72 +654,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - sq_1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '9') - type: boolean + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -894,43 +699,22 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: 1 - b + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -938,22 +722,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1030,9 +810,6 @@ from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li o where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME lineitem))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL l_partkey) p_partkey)))) p) (TOK_TABREF (TOK_TABNAME lineitem) li) (= (. (TOK_TABLE_OR_COL p) p_partkey) (. (TOK_TABLE_OR_COL li) l_partkey)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL p) p_partkey)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL li) l_suppkey))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL li) l_linenumber) 1) (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME lineitem))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL l_orderkey))) (TOK_WHERE (= (TOK_TABLE_OR_COL l_shipmode) 'AIR')))) (. (TOK_TABLE_OR_COL li) l_orderkey)))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 @@ -1042,47 +819,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - p:lineitem + Map Operator Tree: TableScan alias: lineitem + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: l_partkey - type: int + expressions: l_partkey (type: int) outputColumnNames: l_partkey + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: l_partkey - type: int + keys: l_partkey (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: int + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1090,42 +856,26 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME - TableScan - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col0 - type: int - li + Map Operator Tree: TableScan alias: li + Statistics: Num rows: 756 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (l_linenumber = 1) - type: boolean + predicate: (l_linenumber = 1) (type: boolean) + Statistics: Num rows: 378 Data size: 6049 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: l_partkey - type: int + key expressions: l_partkey (type: int) sort order: + - Map-reduce partition columns: - expr: l_partkey - type: int - tag: 1 - value expressions: - expr: l_orderkey - type: int - expr: l_suppkey - type: int + Map-reduce partition columns: l_partkey (type: int) + Statistics: Num rows: 378 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + value expressions: l_orderkey (type: int), l_suppkey (type: int) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -1133,11 +883,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1663 Data size: 6653 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1145,51 +894,34 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: int + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: - expr: _col1 - type: int - tag: 0 - value expressions: - expr: _col3 - type: int - expr: _col0 - type: int - sq_1:lineitem + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1663 Data size: 6653 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col0 (type: int) TableScan alias: lineitem + Statistics: Num rows: 116 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (l_shipmode = 'AIR') - type: boolean + predicate: (l_shipmode = 'AIR') (type: boolean) + Statistics: Num rows: 58 Data size: 6049 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: l_orderkey - type: int + expressions: l_orderkey (type: int) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 6049 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: int + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 6049 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 58 Data size: 6049 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1197,18 +929,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col2} {VALUE._col18} 1 - handleSkewJoin: false outputColumnNames: _col2, _col18 + Statistics: Num rows: 1829 Data size: 7318 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col18 - type: int - expr: _col2 - type: int + expressions: _col18 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1829 Data size: 7318 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1829 Data size: 7318 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/subquery_in_having.q.out ql/src/test/results/clientpositive/subquery_in_having.q.out index a351f2c..ef3dc18 100644 --- ql/src/test/results/clientpositive/subquery_in_having.q.out +++ ql/src/test/results/clientpositive/subquery_in_having.q.out @@ -45,9 +45,6 @@ from src group by key having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_HAVING (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s1) key) '9')) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s1) key)))) (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -57,65 +54,47 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - sq_1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '9') - type: boolean + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: bigint + expressions: _col1 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: bigint + keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -123,34 +102,20 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: bigint + key expressions: _col0 (type: bigint) sort order: + - Map-reduce partition columns: - expr: _col0 - type: bigint - tag: 1 - $INTNAME1 + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: bigint + key expressions: _col1 (type: bigint) sort order: + - Map-reduce partition columns: - expr: _col1 - type: bigint - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -158,22 +123,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 3145 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -181,49 +142,35 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -306,9 +253,6 @@ from src b group by key, value having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value = b.value group by s1.key ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)) (TOK_HAVING (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (and (> (. (TOK_TABLE_OR_COL s1) key) '9') (= (. (TOK_TABLE_OR_COL s1) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s1) key)))) (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -318,59 +262,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + aggregations: count() + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -378,44 +298,20 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col2 - type: bigint - expr: _col1 - type: string + key expressions: _col2 (type: bigint), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col2 - type: bigint - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col2 (type: bigint), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: bigint - expr: _col1 - type: string + key expressions: _col0 (type: bigint), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: bigint - expr: _col1 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -423,24 +319,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -448,79 +338,47 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - sq_1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '9') - type: boolean + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: value, key + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: value - type: string - expr: key - type: string + aggregations: count() + keys: value (type: string), key (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: bigint - expr: _col0 - type: string + expressions: _col2 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: bigint - expr: _col1 - type: string + keys: _col0 (type: bigint), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -554,9 +412,6 @@ having b.p_mfgr in having max(p_size) - min(p_size) < 20 ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL p_size)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) p_mfgr)) (TOK_HAVING (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr))) (TOK_GROUPBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_HAVING (< (- (TOK_FUNCTION max (TOK_TABLE_OR_COL p_size)) (TOK_FUNCTION min (TOK_TABLE_OR_COL p_size))) 20)))) (. (TOK_TABLE_OR_COL b) p_mfgr))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -566,51 +421,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + expressions: p_mfgr (type: string), p_size (type: int) outputColumnNames: p_mfgr, p_size + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: avg(p_size) - bucketGroup: false - keys: - expr: p_mfgr - type: string + aggregations: avg(p_size) + keys: p_mfgr (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: struct + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: avg(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -618,34 +457,20 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: double - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -653,22 +478,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 33 Data size: 3490 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 16 Data size: 1692 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1692 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 16 Data size: 1692 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -676,71 +497,47 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - sq_1:part + Map Operator Tree: TableScan alias: part + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + expressions: p_mfgr (type: string), p_size (type: int) outputColumnNames: p_mfgr, p_size + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: max(p_size) - expr: min(p_size) - bucketGroup: false - keys: - expr: p_mfgr - type: string + aggregations: max(p_size), min(p_size) + keys: p_mfgr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: int - expr: _col2 - type: int + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - expr: min(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: max(VALUE._col0), min(VALUE._col1) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((_col1 - _col2) < 20) - type: boolean + predicate: ((_col1 - _col2) < 20) (type: boolean) + Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -804,9 +601,6 @@ where b.key in (select key from src where src.key > '8') group by key, value having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) '8')))) (. (TOK_TABLE_OR_COL b) key))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)) (TOK_HAVING (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s1) key) '9')) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s1) key)))) (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -817,52 +611,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - sq_1:src + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '8') - type: boolean + predicate: (key > '8') (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -870,33 +648,23 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -904,40 +672,23 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -945,36 +696,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col2 - type: bigint + key expressions: _col2 (type: bigint) sort order: + - Map-reduce partition columns: - expr: _col2 - type: bigint - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: bigint + key expressions: _col0 (type: bigint) sort order: + - Map-reduce partition columns: - expr: _col0 - type: bigint - tag: 1 + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -982,24 +717,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 16 Data size: 3402 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 8 Data size: 1701 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 1701 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 8 Data size: 1701 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1007,65 +736,47 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - sq_2:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '9') - type: boolean + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: bigint + expressions: _col1 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: bigint + keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1103,9 +814,6 @@ where b.key in (select key from src where src.key > '8') group by key, value having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) '8')))) (. (TOK_TABLE_OR_COL b) key))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)) (TOK_HAVING (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s1) key) '9')) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s1) key)))) (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-7 depends on stages: Stage-2, Stage-5 , consists of Stage-6, Stage-3 @@ -1118,65 +826,47 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - sq_2:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '9') - type: boolean + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: bigint + expressions: _col1 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: bigint + keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1187,8 +877,7 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Map Join Operator condition map: @@ -1196,28 +885,17 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} {_col2} 1 - handleSkewJoin: false keys: - 0 [Column[_col2]] - 1 [Column[_col0]] + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Position of Big Table: 0 Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1234,36 +912,20 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col2 - type: bigint + key expressions: _col2 (type: bigint) sort order: + - Map-reduce partition columns: - expr: _col2 - type: bigint - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint - $INTNAME1 + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: bigint + key expressions: _col0 (type: bigint) sort order: + - Map-reduce partition columns: - expr: _col0 - type: bigint - tag: 1 + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1271,24 +933,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 16 Data size: 3402 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 8 Data size: 1701 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 1701 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 8 Data size: 1701 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1304,105 +960,74 @@ STAGE PLANS: sq_1:src TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '8') - type: boolean + predicate: (key > '8') (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 {key} {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[_col0]] - Position of Big Table: 0 + 0 key (type: string) + 1 _col0 (type: string) Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Semi Join 0 to 1 condition expressions: 0 {key} {value} 1 - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[_col0]] + 0 key (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Position of Big Table: 0 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1428,9 +1053,6 @@ group by p_mfgr, p_name having p_name in (select first_value(p_name) over(partition by p_mfgr order by p_size) from part) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_name)) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL p_size)))) (TOK_GROUPBY (TOK_TABLE_OR_COL p_mfgr) (TOK_TABLE_OR_COL p_name)) (TOK_HAVING (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION first_value (TOK_TABLE_OR_COL p_name) (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL p_size)))))))))) (TOK_TABLE_OR_COL p_name))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-5 depends on stages: Stage-1, Stage-3 , consists of Stage-4, Stage-2 @@ -1442,46 +1064,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - sq_1:part + Map Operator Tree: TableScan alias: part + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ - Map-reduce partition columns: - expr: p_mfgr - type: string - tag: -1 - value expressions: - expr: p_name - type: string - expr: p_mfgr - type: string - expr: p_size - type: int + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE PTF Operator + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _wcol0 - type: string + expressions: _wcol0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1492,8 +1100,7 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - $INTNAME1 + Map Operator Tree: TableScan Map Join Operator condition map: @@ -1501,28 +1108,17 @@ STAGE PLANS: condition expressions: 0 {_col0} {_col1} {_col2} 1 - handleSkewJoin: false keys: - 0 [Column[_col1]] - 1 [Column[_col0]] + 0 _col1 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Position of Big Table: 0 Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: double + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1539,36 +1135,20 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: double + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) Reduce Operator Tree: Join Operator condition map: @@ -1576,24 +1156,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 8 Data size: 1745 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: double + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 1745 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 8 Data size: 1745 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1601,61 +1175,35 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - part + Map Operator Tree: TableScan alias: part + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: p_mfgr - type: string - expr: p_name - type: string - expr: p_size - type: int + expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) outputColumnNames: p_mfgr, p_name, p_size + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: avg(p_size) - bucketGroup: false - keys: - expr: p_mfgr - type: string - expr: p_name - type: string + aggregations: avg(p_size) + keys: p_mfgr (type: string), p_name (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: struct + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: avg(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/subquery_multiinsert.q.out ql/src/test/results/clientpositive/subquery_multiinsert.q.out index 72e5188..fdafea3 100644 --- ql/src/test/results/clientpositive/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/subquery_multiinsert.q.out @@ -48,9 +48,6 @@ INSERT OVERWRITE TABLE src_5 where b.key not in ( select key from src s1 where s1.key > '2') order by key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_4))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL a) value)) (> (. (TOK_TABLE_OR_COL a) key) '9'))))) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_5))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s1) key) '2')))) (. (TOK_TABLE_OR_COL b) key)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -65,71 +62,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string - expr: value - type: string + key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: - expr: key - type: string - expr: value - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - sq_1:a TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '9') - type: boolean + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: 1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -137,22 +105,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (1 = 1) - type: boolean + predicate: (1 = 1) (type: boolean) + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 15 Data size: 3093 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -174,22 +138,16 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - $INTNAME1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan Reduce Output Operator sort order: - tag: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -197,11 +155,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -209,47 +166,30 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - sq_2:s1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: s1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '2') - type: boolean + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -257,22 +197,17 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 34 Data size: 7032 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((1 = 1) and _col4 is null) - type: boolean + predicate: ((1 = 1) and _col4 is null) (type: boolean) + Statistics: Num rows: 8 Data size: 1654 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1654 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -280,25 +215,19 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 8 Data size: 1654 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 8 Data size: 1654 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 8 Data size: 1654 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -320,53 +249,44 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: - sq_2_notin_nullcheck:sq_2:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > '2') and key is null) - type: boolean + predicate: ((key > '2') and key is null) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 = 0) - type: boolean + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: bigint + keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/subquery_notexists.q.out ql/src/test/results/clientpositive/subquery_notexists.q.out index 3c1a609..7dda89f 100644 --- ql/src/test/results/clientpositive/subquery_notexists.q.out +++ ql/src/test/results/clientpositive/subquery_notexists.q.out @@ -18,9 +18,6 @@ where not exists where b.value = a.value and a.key = b.key and a.value > 'val_2' ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP exists) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL a) value)) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (> (. (TOK_TABLE_OR_COL a) value) 'val_2')))))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -28,58 +25,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: value - type: string - expr: key - type: string + key expressions: value (type: string), key (type: string) sort order: ++ - Map-reduce partition columns: - expr: value - type: string - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - sq_1:a + Map-reduce partition columns: value (type: string), key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (value > 'val_2') - type: boolean + predicate: (value > 'val_2') (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: _col1, _col2 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col1 (type: string), _col2 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col1 - type: string - expr: _col2 - type: string - tag: 1 - value expressions: - expr: _col2 - type: string + Map-reduce partition columns: _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -87,22 +58,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((1 = 1) and _col6 is null) - type: boolean + predicate: ((1 = 1) and _col6 is null) (type: boolean) + Statistics: Num rows: 7 Data size: 1443 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1443 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 7 Data size: 1443 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -271,9 +238,6 @@ where not exists where b.value = a.value and a.value > 'val_2' ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP exists) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL a) value)) (> (. (TOK_TABLE_OR_COL a) value) 'val_2')))))))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -282,61 +246,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - sq_1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (value > 'val_2') - type: boolean + predicate: (value > 'val_2') (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -344,38 +286,23 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: value (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 1 - value expressions: - expr: _col1 - type: string - b + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) TableScan - alias: b Reduce Output Operator - key expressions: - expr: value - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -383,22 +310,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((1 = 1) and _col5 is null) - type: boolean + predicate: ((1 = 1) and _col5 is null) (type: boolean) + Statistics: Num rows: 7 Data size: 1443 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1443 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 7 Data size: 1443 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/subquery_notexists_having.q.out ql/src/test/results/clientpositive/subquery_notexists_having.q.out index 0cc6106..d8d3d3d 100644 --- ql/src/test/results/clientpositive/subquery_notexists_having.q.out +++ ql/src/test/results/clientpositive/subquery_notexists_having.q.out @@ -20,9 +20,6 @@ having not exists where b.value = a.value and a.key = b.key and a.value > 'val_12' ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)) (TOK_HAVING (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP exists) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL a) value)) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (> (. (TOK_TABLE_OR_COL a) value) 'val_12')))))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -31,52 +28,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -84,57 +61,30 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string - expr: _col0 - type: string + key expressions: _col1 (type: string), _col0 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col1 - type: string - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - sq_1:a + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (value > 'val_12') - type: boolean + predicate: (value > 'val_12') (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: value - type: string - expr: key - type: string + expressions: value (type: string), key (type: string) outputColumnNames: _col1, _col2 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col1 (type: string), _col2 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col1 - type: string - expr: _col2 - type: string - tag: 1 - value expressions: - expr: _col2 - type: string + Map-reduce partition columns: _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -142,22 +92,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col8 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((1 = 1) and _col8 is null) - type: boolean + predicate: ((1 = 1) and _col8 is null) (type: boolean) + Statistics: Num rows: 7 Data size: 1443 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1443 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 7 Data size: 1443 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -225,9 +171,6 @@ having not exists where b.value = a.value and a.value > 'val_12' ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)) (TOK_HAVING (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP exists) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL a) value)) (> (. (TOK_TABLE_OR_COL a) value) 'val_12')))))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -237,52 +180,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -290,37 +213,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - $INTNAME1 + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 1 - value expressions: - expr: _col1 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -328,22 +235,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((1 = 1) and _col6 is null) - type: boolean + predicate: ((1 = 1) and _col6 is null) (type: boolean) + Statistics: Num rows: 7 Data size: 1443 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1443 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 7 Data size: 1443 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -351,61 +254,39 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - sq_1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (value > 'val_12') - type: boolean + predicate: (value > 'val_12') (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/subquery_notin.q.out ql/src/test/results/clientpositive/subquery_notin.q.out index 15d9244..673a704 100644 --- ql/src/test/results/clientpositive/subquery_notin.q.out +++ ql/src/test/results/clientpositive/subquery_notin.q.out @@ -102,9 +102,6 @@ where src.key not in where s1.key > '2' ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s1) key) '2')))) (. (TOK_TABLE_OR_COL src) key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -114,53 +111,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - sq_1_notin_nullcheck:sq_1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > '2') and key is null) - type: boolean + predicate: ((key > '2') and key is null) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 = 0) - type: boolean + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: bigint + keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -168,23 +156,18 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: 1 - src + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -192,11 +175,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -204,47 +186,30 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - sq_1:s1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: s1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '2') - type: boolean + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -252,22 +217,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 34 Data size: 7032 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((1 = 1) and _col4 is null) - type: boolean + predicate: ((1 = 1) and _col4 is null) (type: boolean) + Statistics: Num rows: 8 Data size: 1654 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1654 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 8 Data size: 1654 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -430,9 +391,6 @@ where b.p_name not in where r <= 2 and b.p_mfgr = a.p_mfgr ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) p_name)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size))) (TOK_WHERE (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_name)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size)) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL p_size)))))) r)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_name))) (TOK_WHERE (and (<= (TOK_TABLE_OR_COL r) 2) (= (. (TOK_TABLE_OR_COL b) p_mfgr) (. (TOK_TABLE_OR_COL a) p_mfgr)))))) (. (TOK_TABLE_OR_COL b) p_name)))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -444,45 +402,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - sq_1:a:part + Map Operator Tree: TableScan alias: part + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ - Map-reduce partition columns: - expr: p_mfgr - type: string - tag: -1 - value expressions: - expr: p_name - type: string - expr: p_mfgr - type: string - expr: p_size - type: int + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE PTF Operator + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_wcol0 <= 2) - type: boolean + predicate: (_wcol0 <= 2) (type: boolean) + Statistics: Num rows: 5 Data size: 1057 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 1057 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -490,47 +433,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: string - expr: _col2 - type: string + key expressions: _col1 (type: string), _col2 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col1 - type: string - expr: _col2 - type: string - tag: 0 - value expressions: - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: int - $INTNAME1 + Map-reduce partition columns: _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 5 Data size: 1057 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -538,24 +455,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} {VALUE._col2} {VALUE._col5} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col1, _col2, _col5, _col11 + Statistics: Num rows: 17 Data size: 3839 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((1 = 1) and _col11 is null) - type: boolean + predicate: ((1 = 1) and _col11 is null) (type: boolean) + Statistics: Num rows: 4 Data size: 903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col1 - type: string - expr: _col5 - type: int + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 903 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 903 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -563,45 +474,33 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - sq_1_notin_nullcheck:sq_1:a:part + Map Operator Tree: TableScan alias: part + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ - Map-reduce partition columns: - expr: p_mfgr - type: string - tag: -1 - value expressions: - expr: p_name - type: string - expr: p_mfgr - type: string - expr: p_size - type: int + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE PTF Operator + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((_wcol0 <= 2) and (_col1 is null or _col2 is null)) - type: boolean + predicate: ((_wcol0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean) + Statistics: Num rows: 4 Data size: 846 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 4 Data size: 846 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -609,41 +508,32 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 = 0) - type: boolean + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: bigint + keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -651,25 +541,18 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: 1 - b + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) TableScan - alias: b Reduce Output Operator sort order: - tag: 0 - value expressions: - expr: p_name - type: string - expr: p_mfgr - type: string - expr: p_size - type: int + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -677,11 +560,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} {VALUE._col2} {VALUE._col5} 1 - handleSkewJoin: false outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -749,9 +631,6 @@ part where part.p_size not in where r <= 2 ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_name)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size))) (TOK_WHERE (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_size)) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL p_size)))))) r)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL p_size)))) (TOK_WHERE (<= (TOK_TABLE_OR_COL r) 2)))) (. (TOK_TABLE_OR_COL part) p_size)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -764,47 +643,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - sq_1:a:part + Map Operator Tree: TableScan alias: part + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ - Map-reduce partition columns: - expr: p_mfgr - type: string - tag: -1 - value expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_size (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE PTF Operator + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_wcol0 <= 2) - type: boolean + predicate: (_wcol0 <= 2) (type: boolean) + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col5 - type: int + expressions: _col5 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: avg(_col0) - bucketGroup: false + aggregations: avg(_col0) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -812,30 +679,24 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: avg(VALUE._col0) - bucketGroup: false + aggregations: avg(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double + expressions: _col0 (type: double) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -843,37 +704,21 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - Map-reduce partition columns: - expr: _col0 - type: double - tag: 1 - value expressions: - expr: _col0 - type: double - $INTNAME1 + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) TableScan Reduce Output Operator - key expressions: - expr: UDFToDouble(_col5) - type: double + key expressions: UDFToDouble(_col5) (type: double) sort order: + - Map-reduce partition columns: - expr: UDFToDouble(_col5) - type: double - tag: 0 - value expressions: - expr: _col1 - type: string - expr: _col5 - type: int + Map-reduce partition columns: UDFToDouble(_col5) (type: double) + Statistics: Num rows: 33 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -881,22 +726,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} {VALUE._col5} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col1, _col5, _col11 + Statistics: Num rows: 36 Data size: 3839 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((1 = 1) and _col11 is null) - type: boolean + predicate: ((1 = 1) and _col11 is null) (type: boolean) + Statistics: Num rows: 9 Data size: 959 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col5 - type: int + expressions: _col1 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 959 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 959 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -904,47 +745,35 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - sq_1_notin_nullcheck:sq_1:a:part + Map Operator Tree: TableScan alias: part + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ - Map-reduce partition columns: - expr: p_mfgr - type: string - tag: -1 - value expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_size (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE PTF Operator + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_wcol0 <= 2) - type: boolean + predicate: (_wcol0 <= 2) (type: boolean) + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col5 - type: int + expressions: _col5 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: avg(_col0) - bucketGroup: false + aggregations: avg(_col0) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -952,52 +781,42 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: avg(VALUE._col0) - bucketGroup: false + aggregations: avg(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: _col0 is null - type: boolean + predicate: _col0 is null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: complete outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 = 0) - type: boolean + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: bigint + keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1005,23 +824,18 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: 1 - part + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan alias: part + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: 0 - value expressions: - expr: p_name - type: string - expr: p_size - type: int + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_size (type: int) Reduce Operator Tree: Join Operator condition map: @@ -1029,11 +843,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} {VALUE._col5} 1 - handleSkewJoin: false outputColumnNames: _col1, _col5 + Statistics: Num rows: 33 Data size: 3490 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1107,9 +920,6 @@ from part b where b.p_size not in where r <= 2 and b.p_mfgr = a.p_mfgr ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_name)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size))) (TOK_WHERE (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size)) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL p_size)))))) r)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL p_size)))) (TOK_WHERE (and (<= (TOK_TABLE_OR_COL r) 2) (= (. (TOK_TABLE_OR_COL b) p_mfgr) (. (TOK_TABLE_OR_COL a) p_mfgr)))))) (. (TOK_TABLE_OR_COL b) p_size)))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 @@ -1123,52 +933,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - sq_1:a:part + Map Operator Tree: TableScan alias: part + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ - Map-reduce partition columns: - expr: p_mfgr - type: string - tag: -1 - value expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_size (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE PTF Operator + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_wcol0 <= 2) - type: boolean + predicate: (_wcol0 <= 2) (type: boolean) + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col5 - type: int + expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: min(_col1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: min(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1176,41 +970,27 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: int + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reduce Operator Tree: Group By Operator - aggregations: - expr: min(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: int - expr: _col0 - type: string + expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1218,47 +998,21 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col5 - type: int - expr: _col2 - type: string + key expressions: _col5 (type: int), _col2 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col5 - type: int - expr: _col2 - type: string - tag: 0 - value expressions: - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col5 - type: int - $INTNAME1 + Map-reduce partition columns: _col5 (type: int), _col2 (type: string) + Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: string - tag: 1 - value expressions: - expr: _col0 - type: int + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -1266,24 +1020,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} {VALUE._col2} {VALUE._col5} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col1, _col2, _col5, _col11 + Statistics: Num rows: 17 Data size: 3839 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((1 = 1) and _col11 is null) - type: boolean + predicate: ((1 = 1) and _col11 is null) (type: boolean) + Statistics: Num rows: 4 Data size: 903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col1 - type: string - expr: _col5 - type: int + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 903 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 903 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1291,52 +1039,36 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - sq_1_notin_nullcheck:sq_1:a:part + Map Operator Tree: TableScan alias: part + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ - Map-reduce partition columns: - expr: p_mfgr - type: string - tag: -1 - value expressions: - expr: p_mfgr - type: string - expr: p_size - type: int + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_size (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE PTF Operator + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_wcol0 <= 2) - type: boolean + predicate: (_wcol0 <= 2) (type: boolean) + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: string - expr: _col5 - type: int + expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: min(_col1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: min(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1344,45 +1076,33 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: int + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reduce Operator Tree: Group By Operator - aggregations: - expr: min(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col1 is null or _col0 is null) - type: boolean + predicate: (_col1 is null or _col0 is null) (type: boolean) + Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1390,41 +1110,32 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 = 0) - type: boolean + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: bigint + keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1432,25 +1143,18 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: 1 - b + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) TableScan - alias: b Reduce Output Operator sort order: - tag: 0 - value expressions: - expr: p_name - type: string - expr: p_mfgr - type: string - expr: p_size - type: int + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1458,11 +1162,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col1} {VALUE._col2} {VALUE._col5} 1 - handleSkewJoin: false outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1707,9 +1410,6 @@ POSTHOOK: query: explain select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1_v))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2_v))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL T2_v) key))))) (. (TOK_TABLE_OR_COL T1_v) key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -1719,62 +1419,51 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - sq_1_notin_nullcheck:sq_1:t2_v:t1_v:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < '11') - type: boolean + predicate: (key < '11') (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END - type: string + expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string) outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: _col0 is null - type: boolean + predicate: _col0 is null (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 = 0) - type: boolean + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: bigint + keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1782,30 +1471,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: 1 - t1_v:src + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < '11') - type: boolean + predicate: (key < '11') (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: 0 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1813,11 +1497,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 2093 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1825,45 +1508,30 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - sq_1:t2_v:t1_v:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < '11') - type: boolean + predicate: (key < '11') (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END - type: string + expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string) outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1871,20 +1539,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 2302 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((1 = 1) and _col1 is null) - type: boolean + predicate: ((1 = 1) and _col1 is null) (type: boolean) + Statistics: Num rows: 5 Data size: 523 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 523 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 5 Data size: 523 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/subquery_notin_having.q.out ql/src/test/results/clientpositive/subquery_notin_having.q.out index 8c4fe56..5f4d96e 100644 --- ql/src/test/results/clientpositive/subquery_notin_having.q.out +++ ql/src/test/results/clientpositive/subquery_notin_having.q.out @@ -55,9 +55,6 @@ having key not in where s1.key > '12' ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_HAVING (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s1) key) '12')))) (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-5 @@ -68,53 +65,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - sq_1_notin_nullcheck:sq_1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key > '12') and key is null) - type: boolean + predicate: ((key > '12') and key is null) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 = 0) - type: boolean + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: bigint + keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -122,22 +110,16 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: 1 - $INTNAME1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan Reduce Output Operator sort order: - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -145,11 +127,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -157,47 +138,30 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - sq_1:s1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan alias: s1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > '12') - type: boolean + predicate: (key > '12') (type: boolean) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -205,22 +169,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((1 = 1) and _col4 is null) - type: boolean + predicate: ((1 = 1) and _col4 is null) (type: boolean) + Statistics: Num rows: 17 Data size: 1732 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 1732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 17 Data size: 1732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -228,49 +188,35 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -302,9 +248,6 @@ having b.p_mfgr not in where min(p_retailprice) = l and r - l > 600 ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) p_mfgr)) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL p_retailprice)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) p_mfgr)) (TOK_HAVING (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL p_retailprice)) l) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL p_retailprice)) r) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL p_retailprice)) a)) (TOK_GROUPBY (TOK_TABLE_OR_COL p_mfgr)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr))) (TOK_WHERE (and (= (TOK_FUNCTION min (TOK_TABLE_OR_COL p_retailprice)) (TOK_TABLE_OR_COL l)) (> (- (TOK_TABLE_OR_COL r) (TOK_TABLE_OR_COL l)) 600))))) (. (TOK_TABLE_OR_COL b) p_mfgr)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-6 @@ -317,59 +260,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: p_mfgr - type: string - expr: p_retailprice - type: double + expressions: p_mfgr (type: string), p_retailprice (type: double) outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: min(p_retailprice) - expr: max(p_retailprice) - expr: avg(p_retailprice) - bucketGroup: false - keys: - expr: p_mfgr - type: string + aggregations: min(p_retailprice), max(p_retailprice), avg(p_retailprice) + keys: p_mfgr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double - expr: _col2 - type: double - expr: _col3 - type: struct + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: min(VALUE._col0) - expr: max(VALUE._col1) - expr: avg(VALUE._col2) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: min(VALUE._col0), max(VALUE._col1), avg(VALUE._col2) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -377,24 +296,16 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: double - expr: _col1 - type: double - $INTNAME1 + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double), _col1 (type: double) TableScan Reduce Output Operator sort order: - tag: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -402,11 +313,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 31 Data size: 3490 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -414,45 +324,21 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col5 - type: double + key expressions: _col0 (type: string), _col5 (type: double) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col5 - type: double - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: double - $INTNAME1 + Map-reduce partition columns: _col0 (type: string), _col5 (type: double) + Statistics: Num rows: 31 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: double + key expressions: _col0 (type: string), _col1 (type: double) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: double - tag: 1 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col0 (type: string), _col1 (type: double) + Statistics: Num rows: 4 Data size: 437 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -460,22 +346,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 34 Data size: 3839 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((1 = 1) and _col7 is null) - type: boolean + predicate: ((1 = 1) and _col7 is null) (type: boolean) + Statistics: Num rows: 8 Data size: 903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 903 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 8 Data size: 903 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -483,70 +365,42 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - sq_1:a:part + Map Operator Tree: TableScan alias: part + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: p_mfgr - type: string - expr: p_retailprice - type: double + expressions: p_mfgr (type: string), p_retailprice (type: double) outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: min(p_retailprice) - expr: max(p_retailprice) - expr: avg(p_retailprice) - bucketGroup: false - keys: - expr: p_mfgr - type: string + aggregations: min(p_retailprice), max(p_retailprice), avg(p_retailprice) + keys: p_mfgr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double - expr: _col2 - type: double - expr: _col3 - type: struct + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: min(VALUE._col0) - expr: max(VALUE._col1) - expr: avg(VALUE._col2) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: min(VALUE._col0), max(VALUE._col1), avg(VALUE._col2) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 1531 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((_col2 - _col1) > 600) - type: boolean + predicate: ((_col2 - _col1) > 600) (type: boolean) + Statistics: Num rows: 4 Data size: 437 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 437 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -554,70 +408,45 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - sq_1_notin_nullcheck:sq_1:a:part + Map Operator Tree: TableScan alias: part + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: p_mfgr - type: string - expr: p_retailprice - type: double + expressions: p_mfgr (type: string), p_retailprice (type: double) outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: min(p_retailprice) - expr: max(p_retailprice) - expr: avg(p_retailprice) - bucketGroup: false - keys: - expr: p_mfgr - type: string + aggregations: min(p_retailprice), max(p_retailprice), avg(p_retailprice) + keys: p_mfgr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double - expr: _col2 - type: double - expr: _col3 - type: struct + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: min(VALUE._col0) - expr: max(VALUE._col1) - expr: avg(VALUE._col2) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: min(VALUE._col0), max(VALUE._col1), avg(VALUE._col2) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 1531 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((_col2 - _col1) > 600) and (_col0 is null or _col1 is null)) - type: boolean + predicate: (((_col2 - _col1) > 600) and (_col0 is null or _col1 is null)) (type: boolean) + Statistics: Num rows: 2 Data size: 218 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 2 Data size: 218 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -625,41 +454,32 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 = 0) - type: boolean + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: bigint + keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -717,9 +537,6 @@ having b.p_mfgr not in having max(p_retailprice) - min(p_retailprice) > 600 ) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) p_mfgr)) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL p_retailprice)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) p_mfgr)) (TOK_HAVING (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr))) (TOK_GROUPBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_HAVING (> (- (TOK_FUNCTION max (TOK_TABLE_OR_COL p_retailprice)) (TOK_FUNCTION min (TOK_TABLE_OR_COL p_retailprice))) 600)))) (. (TOK_TABLE_OR_COL b) p_mfgr)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-5 @@ -732,55 +549,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: p_mfgr - type: string - expr: p_retailprice - type: double + expressions: p_mfgr (type: string), p_retailprice (type: double) outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: min(p_retailprice) - expr: max(p_retailprice) - bucketGroup: false - keys: - expr: p_mfgr - type: string + aggregations: min(p_retailprice), max(p_retailprice) + keys: p_mfgr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double - expr: _col2 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: min(VALUE._col0) - expr: max(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: min(VALUE._col0), max(VALUE._col1) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -788,22 +585,16 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: double - $INTNAME1 + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) TableScan Reduce Output Operator sort order: - tag: 1 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -811,11 +602,10 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 3490 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -823,37 +613,21 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - $INTNAME + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: double - $INTNAME1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 437 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -861,22 +635,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 34 Data size: 3839 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((1 = 1) and _col5 is null) - type: boolean + predicate: ((1 = 1) and _col5 is null) (type: boolean) + Statistics: Num rows: 8 Data size: 903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: double + expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 903 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 8 Data size: 903 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -884,70 +654,48 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - sq_1_notin_nullcheck:sq_1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: p_mfgr is null - type: boolean + predicate: p_mfgr is null (type: boolean) + Statistics: Num rows: 14 Data size: 1531 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: p_mfgr - type: string - expr: p_retailprice - type: double + expressions: p_mfgr (type: string), p_retailprice (type: double) outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 14 Data size: 1531 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: max(p_retailprice) - expr: min(p_retailprice) - bucketGroup: false - keys: - expr: p_mfgr - type: string + aggregations: max(p_retailprice), min(p_retailprice) + keys: p_mfgr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1531 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double - expr: _col2 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 1531 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - expr: min(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: max(VALUE._col0), min(VALUE._col1) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 765 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((_col1 - _col2) > 600) and _col0 is null) - type: boolean + predicate: (((_col1 - _col2) > 600) and _col0 is null) (type: boolean) + Statistics: Num rows: 1 Data size: 109 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 1 Data size: 109 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -955,41 +703,32 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (_col0 = 0) - type: boolean + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: bigint + keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -997,64 +736,42 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - sq_1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: p_mfgr - type: string - expr: p_retailprice - type: double + expressions: p_mfgr (type: string), p_retailprice (type: double) outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: max(p_retailprice) - expr: min(p_retailprice) - bucketGroup: false - keys: - expr: p_mfgr - type: string + aggregations: max(p_retailprice), min(p_retailprice) + keys: p_mfgr (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: double - expr: _col2 - type: double + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - expr: min(VALUE._col1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: max(VALUE._col0), min(VALUE._col1) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1531 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((_col1 - _col2) > 600) - type: boolean + predicate: ((_col1 - _col2) > 600) (type: boolean) + Statistics: Num rows: 4 Data size: 437 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 437 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/symlink_text_input_format.q.out ql/src/test/results/clientpositive/symlink_text_input_format.q.out index fa90b76..c33c863 100644 --- ql/src/test/results/clientpositive/symlink_text_input_format.q.out +++ ql/src/test/results/clientpositive/symlink_text_input_format.q.out @@ -8,9 +8,6 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: EXPLAIN CREATE TABLE symlink_text_input_format (key STRING, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat' POSTHOOK: type: CREATETABLE -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME symlink_text_input_format) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL key TOK_STRING) (TOK_TABCOL value TOK_STRING)) (TOK_TABLEFILEFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat' 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat')) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -19,12 +16,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: key string, value string - if not exists: false input format: org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: symlink_text_input_format - isExternal: false PREHOOK: query: CREATE TABLE symlink_text_input_format (key STRING, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat' PREHOOK: type: CREATETABLE @@ -35,9 +29,6 @@ PREHOOK: query: EXPLAIN SELECT * FROM symlink_text_input_format order by key, va PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM symlink_text_input_format order by key, value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME symlink_text_input_format))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -45,35 +36,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - symlink_text_input_format + Map Operator Tree: TableScan alias: symlink_text_input_format + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -111,9 +92,6 @@ PREHOOK: query: EXPLAIN SELECT value FROM symlink_text_input_format order by val PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT value FROM symlink_text_input_format order by value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME symlink_text_input_format))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -121,29 +99,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - symlink_text_input_format + Map Operator Tree: TableScan alias: symlink_text_input_format + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: value - type: string + expressions: value (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -181,9 +155,6 @@ PREHOOK: query: EXPLAIN SELECT count(1) FROM symlink_text_input_format PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT count(1) FROM symlink_text_input_format POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME symlink_text_input_format))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -191,38 +162,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - symlink_text_input_format + Map Operator Tree: TableScan alias: symlink_text_input_format + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/transform1.q.out ql/src/test/results/clientpositive/transform1.q.out index 4530d2c..7ce3b85 100644 --- ql/src/test/results/clientpositive/transform1.q.out +++ ql/src/test/results/clientpositive/transform1.q.out @@ -9,9 +9,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT transform(*) USING 'cat' AS (col array) FROM transform1_t1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME transform1_t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST TOK_ALLCOLREF) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_TABCOLLIST (TOK_TABCOL col (TOK_LIST TOK_BIGINT)))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -19,26 +16,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - transform1_t1 + Map Operator Tree: TableScan alias: transform1_t1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: a - type: string - expr: b - type: string + expressions: a (type: string), b (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -79,9 +74,6 @@ POSTHOOK: query: EXPLAIN SELECT transform('0\0021\0022') USING 'cat' AS (col array) FROM transform1_t2 POSTHOOK: type: QUERY POSTHOOK: Lineage: transform1_t2.col EXPRESSION [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME transform1_t2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST '0\0021\0022') TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_TABCOLLIST (TOK_TABCOL col (TOK_LIST TOK_INT)))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -89,24 +81,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - transform1_t2 + Map Operator Tree: TableScan alias: transform1_t2 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: '012' - type: string + expressions: '012' (type: string) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/transform_ppr1.q.out ql/src/test/results/clientpositive/transform_ppr1.q.out index bb7ca29..9c537f0 100644 --- ql/src/test/results/clientpositive/transform_ppr1.q.out +++ ql/src/test/results/clientpositive/transform_ppr1.q.out @@ -17,7 +17,79 @@ FROM ( SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 AND tmap.ds = '2008-04-08' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) ds) (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST ds tkey tvalue)))) (TOK_CLUSTERBY (TOK_TABLE_OR_COL tkey)))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) tkey)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) tvalue))) (TOK_WHERE (AND (< (. (TOK_TABLE_OR_COL tmap) tkey) 100) (= (. (TOK_TABLE_OR_COL tmap) ds) '2008-04-08'))))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TRANSFORM + TOK_EXPLIST + . + TOK_TABLE_OR_COL + src + ds + . + TOK_TABLE_OR_COL + src + key + . + TOK_TABLE_OR_COL + src + value + TOK_SERDE + TOK_RECORDWRITER + 'cat' + TOK_SERDE + TOK_RECORDREADER + TOK_ALIASLIST + ds + tkey + tvalue + TOK_CLUSTERBY + TOK_TABLE_OR_COL + tkey + tmap + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + tmap + tkey + TOK_SELEXPR + . + TOK_TABLE_OR_COL + tmap + tvalue + TOK_WHERE + AND + < + . + TOK_TABLE_OR_COL + tmap + tkey + 100 + = + . + TOK_TABLE_OR_COL + tmap + ds + '2008-04-08' + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -26,24 +98,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 116 dataSize: 23248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: ds - type: string - expr: key - type: string - expr: value - type: string + expressions: ds (type: string), key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 116 dataSize: 23248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: @@ -56,33 +119,18 @@ STAGE PLANS: serialization.format 9 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Statistics: - numRows: 116 dataSize: 23248 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: ((_col1 < 100) and (_col0 = '2008-04-08')) - type: boolean - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((_col1 < 100) and (_col0 = '2008-04-08')) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -262,24 +310,17 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/transform_ppr2.q.out ql/src/test/results/clientpositive/transform_ppr2.q.out index 33fe33e..4bddc69 100644 --- ql/src/test/results/clientpositive/transform_ppr2.q.out +++ ql/src/test/results/clientpositive/transform_ppr2.q.out @@ -19,7 +19,79 @@ FROM ( SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL src) ds) (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src) value)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST ds tkey tvalue)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) ds) '2008-04-08')) (TOK_CLUSTERBY (TOK_TABLE_OR_COL tkey)))) tmap)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) tkey)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmap) tvalue))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL tmap) tkey) 100)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TRANSFORM + TOK_EXPLIST + . + TOK_TABLE_OR_COL + src + ds + . + TOK_TABLE_OR_COL + src + key + . + TOK_TABLE_OR_COL + src + value + TOK_SERDE + TOK_RECORDWRITER + 'cat' + TOK_SERDE + TOK_RECORDREADER + TOK_ALIASLIST + ds + tkey + tvalue + TOK_WHERE + = + . + TOK_TABLE_OR_COL + src + ds + '2008-04-08' + TOK_CLUSTERBY + TOK_TABLE_OR_COL + tkey + tmap + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + tmap + tkey + TOK_SELEXPR + . + TOK_TABLE_OR_COL + tmap + tvalue + TOK_WHERE + < + . + TOK_TABLE_OR_COL + tmap + tkey + 100 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -28,24 +100,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - tmap:src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: ds - type: string - expr: key - type: string - expr: value - type: string + expressions: ds (type: string), key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: @@ -58,33 +121,18 @@ STAGE PLANS: serialization.format 9 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: - expr: (_col1 < 100) - type: boolean - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + predicate: (_col1 < 100) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -178,24 +226,17 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/truncate_table.q.out ql/src/test/results/clientpositive/truncate_table.q.out index 9c864ea..673cf3b 100644 --- ql/src/test/results/clientpositive/truncate_table.q.out +++ ql/src/test/results/clientpositive/truncate_table.q.out @@ -72,9 +72,6 @@ PREHOOK: type: TRUNCATETABLE POSTHOOK: query: -- truncate non-partitioned table explain TRUNCATE TABLE src_truncate POSTHOOK: type: TRUNCATETABLE -ABSTRACT SYNTAX TREE: - (TOK_TRUNCATETABLE (TOK_TABLE_PARTITION src_truncate)) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -104,9 +101,6 @@ PREHOOK: type: TRUNCATETABLE POSTHOOK: query: -- truncate a partition explain TRUNCATE TABLE srcpart_truncate partition (ds='2008-04-08', hr='11') POSTHOOK: type: TRUNCATETABLE -ABSTRACT SYNTAX TREE: - (TOK_TRUNCATETABLE (TOK_TABLE_PARTITION srcpart_truncate (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr '11')))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -141,9 +135,6 @@ PREHOOK: type: TRUNCATETABLE POSTHOOK: query: -- truncate partitions with partial spec explain TRUNCATE TABLE srcpart_truncate partition (ds, hr='12') POSTHOOK: type: TRUNCATETABLE -ABSTRACT SYNTAX TREE: - (TOK_TRUNCATETABLE (TOK_TABLE_PARTITION srcpart_truncate (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr '12')))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -182,9 +173,6 @@ PREHOOK: type: TRUNCATETABLE POSTHOOK: query: -- truncate partitioned table explain TRUNCATE TABLE srcpart_truncate POSTHOOK: type: TRUNCATETABLE -ABSTRACT SYNTAX TREE: - (TOK_TRUNCATETABLE (TOK_TABLE_PARTITION srcpart_truncate)) - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/type_cast_1.q.out ql/src/test/results/clientpositive/type_cast_1.q.out index eeb4ab3..d337727 100644 --- ql/src/test/results/clientpositive/type_cast_1.q.out +++ ql/src/test/results/clientpositive/type_cast_1.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT IF(false, 1, cast(2 as smallint)) + 3 FROM src LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_FUNCTION IF false 1 (TOK_FUNCTION TOK_SMALLINT 2)) 3))) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -17,12 +14,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: (if(false, 1, UDFToShort(2)) + 3) - type: int + expressions: (if(false, 1, UDFToShort(2)) + 3) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT IF(false, 1, cast(2 as smallint)) + 3 FROM src LIMIT 1 diff --git ql/src/test/results/clientpositive/type_widening.q.out ql/src/test/results/clientpositive/type_widening.q.out index 513ad4d..fd833f1 100644 --- ql/src/test/results/clientpositive/type_widening.q.out +++ ql/src/test/results/clientpositive/type_widening.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- Check for int, bigint automatic type widening conversions in UDFs, UNIONS EXPLAIN SELECT COALESCE(0, 9223372036854775807) FROM src LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COALESCE 0 9223372036854775807))) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -17,12 +14,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: COALESCE(0,9223372036854775807) - type: bigint + expressions: COALESCE(0,9223372036854775807) (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT COALESCE(0, 9223372036854775807) FROM src LIMIT 1 @@ -38,9 +37,6 @@ PREHOOK: query: EXPLAIN SELECT * FROM (SELECT 0 AS numcol FROM src UNION ALL SEL PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM (SELECT 0 AS numcol FROM src UNION ALL SELECT 9223372036854775807 AS numcol FROM src) a ORDER BY numcol POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 0 numcol)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 9223372036854775807 numcol))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL numcol))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -48,58 +44,49 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:a-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: UDFToLong(0) - type: bigint + expressions: UDFToLong(0) (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Union + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: bigint + key expressions: _col0 (type: bigint) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery2:a-subquery2:src + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: 9223372036854775807 - type: bigint + expressions: 9223372036854775807 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Union + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: bigint + key expressions: _col0 (type: bigint) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udaf_number_format.q.out ql/src/test/results/clientpositive/udaf_number_format.q.out index 2480c9e..339ef94 100644 --- ql/src/test/results/clientpositive/udaf_number_format.q.out +++ ql/src/test/results/clientpositive/udaf_number_format.q.out @@ -12,9 +12,6 @@ POSTHOOK: query: EXPLAIN SELECT std('a') FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum 'a')) (TOK_SELEXPR (TOK_FUNCTION avg 'a')) (TOK_SELEXPR (TOK_FUNCTION variance 'a')) (TOK_SELEXPR (TOK_FUNCTION std 'a'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -22,56 +19,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: sum('a') - expr: avg('a') - expr: variance('a') - expr: std('a') - bucketGroup: false + aggregations: sum('a'), avg('a'), variance('a'), std('a') mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: double - expr: _col1 - type: struct - expr: _col2 - type: struct - expr: _col3 - type: struct + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - expr: avg(VALUE._col1) - expr: variance(VALUE._col2) - expr: std(VALUE._col3) - bucketGroup: false + aggregations: sum(VALUE._col0), avg(VALUE._col1), variance(VALUE._col2), std(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: double - expr: _col2 - type: double - expr: _col3 - type: double + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf1.q.out ql/src/test/results/clientpositive/udf1.q.out index 79ce5a7..89014b6 100644 --- ql/src/test/results/clientpositive/udf1.q.out +++ ql/src/test/results/clientpositive/udf1.q.out @@ -29,9 +29,6 @@ FROM src INSERT OVERWRITE TABLE dest1 SELECT 'a' LIKE '%a%', 'b' LIKE '%a%', 'ab REGEXP_REPLACE('abc', '', 'A'), 'abc' RLIKE '' WHERE src.key = 86 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (LIKE 'a' '%a%')) (TOK_SELEXPR (LIKE 'b' '%a%')) (TOK_SELEXPR (LIKE 'ab' '%a%')) (TOK_SELEXPR (LIKE 'ab' '%a_')) (TOK_SELEXPR (LIKE '%_' '\%\_')) (TOK_SELEXPR (LIKE 'ab' '\%\_')) (TOK_SELEXPR (LIKE 'ab' '_a%')) (TOK_SELEXPR (LIKE 'ab' 'a')) (TOK_SELEXPR (RLIKE '' '.*')) (TOK_SELEXPR (RLIKE 'a' '[ab]')) (TOK_SELEXPR (RLIKE '' '[ab]')) (TOK_SELEXPR (RLIKE 'hadoop' '[a-z]*')) (TOK_SELEXPR (RLIKE 'hadoop' 'o*')) (TOK_SELEXPR (TOK_FUNCTION REGEXP_REPLACE 'abc' 'b' 'c')) (TOK_SELEXPR (TOK_FUNCTION REGEXP_REPLACE 'abc' 'z' 'a')) (TOK_SELEXPR (TOK_FUNCTION REGEXP_REPLACE 'abbbb' 'bb' 'b')) (TOK_SELEXPR (TOK_FUNCTION REGEXP_REPLACE 'hadoop' '(.)[a-z]*' '$1ive')) (TOK_SELEXPR (TOK_FUNCTION REGEXP_REPLACE 'hadoopAAA' 'A.*' '')) (TOK_SELEXPR (TOK_FUNCTION REGEXP_REPLACE 'abc' '' 'A')) (TOK_SELEXPR (RLIKE 'abc' ''))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) key) 86)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -45,60 +42,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: ('a' like '%a%') - type: boolean - expr: ('b' like '%a%') - type: boolean - expr: ('ab' like '%a%') - type: boolean - expr: ('ab' like '%a_') - type: boolean - expr: ('%_' like '\%\_') - type: boolean - expr: ('ab' like '\%\_') - type: boolean - expr: ('ab' like '_a%') - type: boolean - expr: ('ab' like 'a') - type: boolean - expr: ('' rlike '.*') - type: boolean - expr: ('a' rlike '[ab]') - type: boolean - expr: ('' rlike '[ab]') - type: boolean - expr: ('hadoop' rlike '[a-z]*') - type: boolean - expr: ('hadoop' rlike 'o*') - type: boolean - expr: regexp_replace('abc', 'b', 'c') - type: string - expr: regexp_replace('abc', 'z', 'a') - type: string - expr: regexp_replace('abbbb', 'bb', 'b') - type: string - expr: regexp_replace('hadoop', '(.)[a-z]*', '$1ive') - type: string - expr: regexp_replace('hadoopAAA', 'A.*', '') - type: string - expr: regexp_replace('abc', '', 'A') - type: string - expr: ('abc' rlike '') - type: boolean + expressions: ('a' like '%a%') (type: boolean), ('b' like '%a%') (type: boolean), ('ab' like '%a%') (type: boolean), ('ab' like '%a_') (type: boolean), ('%_' like '\%\_') (type: boolean), ('ab' like '\%\_') (type: boolean), ('ab' like '_a%') (type: boolean), ('ab' like 'a') (type: boolean), ('' rlike '.*') (type: boolean), ('a' rlike '[ab]') (type: boolean), ('' rlike '[ab]') (type: boolean), ('hadoop' rlike '[a-z]*') (type: boolean), ('hadoop' rlike 'o*') (type: boolean), regexp_replace('abc', 'b', 'c') (type: string), regexp_replace('abc', 'z', 'a') (type: string), regexp_replace('abbbb', 'bb', 'b') (type: string), regexp_replace('hadoop', '(.)[a-z]*', '$1ive') (type: string), regexp_replace('hadoopAAA', 'A.*', '') (type: string), regexp_replace('abc', '', 'A') (type: string), ('abc' rlike '') (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -129,12 +86,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -143,12 +98,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf2.q.out ql/src/test/results/clientpositive/udf2.q.out index 10f18f9..ca7ba74 100644 --- ql/src/test/results/clientpositive/udf2.q.out +++ ql/src/test/results/clientpositive/udf2.q.out @@ -19,9 +19,6 @@ POSTHOOK: query: EXPLAIN SELECT '|', trim(dest1.c1), '|', rtrim(dest1.c1), '|', ltrim(dest1.c1), '|' FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.c1 SIMPLE [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR '|') (TOK_SELEXPR (TOK_FUNCTION trim (. (TOK_TABLE_OR_COL dest1) c1))) (TOK_SELEXPR '|') (TOK_SELEXPR (TOK_FUNCTION rtrim (. (TOK_TABLE_OR_COL dest1) c1))) (TOK_SELEXPR '|') (TOK_SELEXPR (TOK_FUNCTION ltrim (. (TOK_TABLE_OR_COL dest1) c1))) (TOK_SELEXPR '|')))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -29,30 +26,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - dest1 + Map Operator Tree: TableScan alias: dest1 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: '|' - type: string - expr: trim(c1) - type: string - expr: '|' - type: string - expr: rtrim(c1) - type: string - expr: '|' - type: string - expr: ltrim(c1) - type: string - expr: '|' - type: string + expressions: '|' (type: string), trim(c1) (type: string), '|' (type: string), rtrim(c1) (type: string), '|' (type: string), ltrim(c1) (type: string), '|' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf3.q.out ql/src/test/results/clientpositive/udf3.q.out index c0053ab..546f949 100644 --- ql/src/test/results/clientpositive/udf3.q.out +++ ql/src/test/results/clientpositive/udf3.q.out @@ -11,9 +11,6 @@ POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE dest1 SELECT count(CAST('' AS INT)), sum(CAST('' AS INT)), avg(CAST('' AS INT)), min(CAST('' AS INT)), max(CAST('' AS INT)) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count (TOK_FUNCTION TOK_INT ''))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION TOK_INT ''))) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_FUNCTION TOK_INT ''))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_FUNCTION TOK_INT ''))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_FUNCTION TOK_INT '')))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -22,62 +19,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(UDFToInteger('')) - expr: sum(UDFToInteger('')) - expr: avg(UDFToInteger('')) - expr: min(UDFToInteger('')) - expr: max(UDFToInteger('')) - bucketGroup: false + aggregations: count(UDFToInteger('')), sum(UDFToInteger('')), avg(UDFToInteger('')), min(UDFToInteger('')), max(UDFToInteger('')) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: struct - expr: _col3 - type: int - expr: _col4 - type: int + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: struct), _col3 (type: int), _col4 (type: int) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: sum(VALUE._col1) - expr: avg(VALUE._col2) - expr: min(VALUE._col3) - expr: max(VALUE._col4) - bucketGroup: false + aggregations: count(VALUE._col0), sum(VALUE._col1), avg(VALUE._col2), min(VALUE._col3), max(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint - expr: _col2 - type: double - expr: _col3 - type: int - expr: _col4 - type: int + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: int), _col4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf4.q.out ql/src/test/results/clientpositive/udf4.q.out index c30560a..99c4591 100644 --- ql/src/test/results/clientpositive/udf4.q.out +++ ql/src/test/results/clientpositive/udf4.q.out @@ -63,9 +63,6 @@ CAST(1 AS BIGINT) ^ CAST(3 AS BIGINT) FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.c1 SIMPLE [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION round 1.0)) (TOK_SELEXPR (TOK_FUNCTION round 1.5)) (TOK_SELEXPR (TOK_FUNCTION round (- 1.5))) (TOK_SELEXPR (TOK_FUNCTION floor 1.0)) (TOK_SELEXPR (TOK_FUNCTION floor 1.5)) (TOK_SELEXPR (TOK_FUNCTION floor (- 1.5))) (TOK_SELEXPR (TOK_FUNCTION sqrt 1.0)) (TOK_SELEXPR (TOK_FUNCTION sqrt (- 1.0))) (TOK_SELEXPR (TOK_FUNCTION sqrt 0.0)) (TOK_SELEXPR (TOK_FUNCTION ceil 1.0)) (TOK_SELEXPR (TOK_FUNCTION ceil 1.5)) (TOK_SELEXPR (TOK_FUNCTION ceil (- 1.5))) (TOK_SELEXPR (TOK_FUNCTION ceiling 1.0)) (TOK_SELEXPR (TOK_FUNCTION rand 3)) (TOK_SELEXPR (+ 3)) (TOK_SELEXPR (- 3)) (TOK_SELEXPR (+ 1 (+ 2))) (TOK_SELEXPR (+ 1 (- 2))) (TOK_SELEXPR (~ 1)) (TOK_SELEXPR (~ (TOK_FUNCTION TOK_TINYINT 1))) (TOK_SELEXPR (~ (TOK_FUNCTION TOK_SMALLINT 1))) (TOK_SELEXPR (~ (TOK_FUNCTION TOK_BIGINT 1))) (TOK_SELEXPR (& (TOK_FUNCTION TOK_TINYINT 1) (TOK_FUNCTION TOK_TINYINT 2))) (TOK_SELEXPR (& (TOK_FUNCTION TOK_SMALLINT 1) (TOK_FUNCTION TOK_SMALLINT 2))) (TOK_SELEXPR (& 1 2)) (TOK_SELEXPR (& (TOK_FUNCTION TOK_BIGINT 1) (TOK_FUNCTION TOK_BIGINT 2))) (TOK_SELEXPR (| (TOK_FUNCTION TOK_TINYINT 1) (TOK_FUNCTION TOK_TINYINT 2))) (TOK_SELEXPR (| (TOK_FUNCTION TOK_SMALLINT 1) (TOK_FUNCTION TOK_SMALLINT 2))) (TOK_SELEXPR (| 1 2)) (TOK_SELEXPR (| (TOK_FUNCTION TOK_BIGINT 1) (TOK_FUNCTION TOK_BIGINT 2))) (TOK_SELEXPR (^ (TOK_FUNCTION TOK_TINYINT 1) (TOK_FUNCTION TOK_TINYINT 3))) (TOK_SELEXPR (^ (TOK_FUNCTION TOK_SMALLINT 1) (TOK_FUNCTION TOK_SMALLINT 3))) (TOK_SELEXPR (^ 1 3)) (TOK_SELEXPR (^ (TOK_FUNCTION TOK_BIGINT 1) (TOK_FUNCTION TOK_BIGINT 3)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -73,84 +70,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - dest1 + Map Operator Tree: TableScan alias: dest1 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: round(1.0) - type: double - expr: round(1.5) - type: double - expr: round((- 1.5)) - type: double - expr: floor(1.0) - type: bigint - expr: floor(1.5) - type: bigint - expr: floor((- 1.5)) - type: bigint - expr: sqrt(1.0) - type: double - expr: sqrt((- 1.0)) - type: double - expr: sqrt(0.0) - type: double - expr: ceil(1.0) - type: bigint - expr: ceil(1.5) - type: bigint - expr: ceil((- 1.5)) - type: bigint - expr: ceil(1.0) - type: bigint - expr: rand(3) - type: double - expr: 3 - type: int - expr: (- 3) - type: int - expr: (1 + 2) - type: int - expr: (1 + (- 2)) - type: int - expr: (~ 1) - type: int - expr: (~ UDFToByte(1)) - type: tinyint - expr: (~ UDFToShort(1)) - type: smallint - expr: (~ UDFToLong(1)) - type: bigint - expr: (UDFToByte(1) & UDFToByte(2)) - type: tinyint - expr: (UDFToShort(1) & UDFToShort(2)) - type: smallint - expr: (1 & 2) - type: int - expr: (UDFToLong(1) & UDFToLong(2)) - type: bigint - expr: (UDFToByte(1) | UDFToByte(2)) - type: tinyint - expr: (UDFToShort(1) | UDFToShort(2)) - type: smallint - expr: (1 | 2) - type: int - expr: (UDFToLong(1) | UDFToLong(2)) - type: bigint - expr: (UDFToByte(1) ^ UDFToByte(3)) - type: tinyint - expr: (UDFToShort(1) ^ UDFToShort(3)) - type: smallint - expr: (1 ^ 3) - type: int - expr: (UDFToLong(1) ^ UDFToLong(3)) - type: bigint + expressions: round(1.0) (type: double), round(1.5) (type: double), round((- 1.5)) (type: double), floor(1.0) (type: bigint), floor(1.5) (type: bigint), floor((- 1.5)) (type: bigint), sqrt(1.0) (type: double), sqrt((- 1.0)) (type: double), sqrt(0.0) (type: double), ceil(1.0) (type: bigint), ceil(1.5) (type: bigint), ceil((- 1.5)) (type: bigint), ceil(1.0) (type: bigint), rand(3) (type: double), 3 (type: int), (- 3) (type: int), (1 + 2) (type: int), (1 + (- 2)) (type: int), (~ 1) (type: int), (~ UDFToByte(1)) (type: tinyint), (~ UDFToShort(1)) (type: smallint), (~ UDFToLong(1)) (type: bigint), (UDFToByte(1) & UDFToByte(2)) (type: tinyint), (UDFToShort(1) & UDFToShort(2)) (type: smallint), (1 & 2) (type: int), (UDFToLong(1) & UDFToLong(2)) (type: bigint), (UDFToByte(1) | UDFToByte(2)) (type: tinyint), (UDFToShort(1) | UDFToShort(2)) (type: smallint), (1 | 2) (type: int), (UDFToLong(1) | UDFToLong(2)) (type: bigint), (UDFToByte(1) ^ UDFToByte(3)) (type: tinyint), (UDFToShort(1) ^ UDFToShort(3)) (type: smallint), (1 ^ 3) (type: int), (UDFToLong(1) ^ UDFToLong(3)) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf5.q.out ql/src/test/results/clientpositive/udf5.q.out index 6223500..1acd4f1 100644 --- ql/src/test/results/clientpositive/udf5.q.out +++ ql/src/test/results/clientpositive/udf5.q.out @@ -19,9 +19,6 @@ POSTHOOK: query: EXPLAIN SELECT from_unixtime(1226446340), to_date(from_unixtime(1226446340)), day('2008-11-01'), month('2008-11-01'), year('2008-11-01'), day('2008-11-01 15:32:20'), month('2008-11-01 15:32:20'), year('2008-11-01 15:32:20') FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.c1 SIMPLE [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION from_unixtime 1226446340)) (TOK_SELEXPR (TOK_FUNCTION to_date (TOK_FUNCTION from_unixtime 1226446340))) (TOK_SELEXPR (TOK_FUNCTION day '2008-11-01')) (TOK_SELEXPR (TOK_FUNCTION month '2008-11-01')) (TOK_SELEXPR (TOK_FUNCTION year '2008-11-01')) (TOK_SELEXPR (TOK_FUNCTION day '2008-11-01 15:32:20')) (TOK_SELEXPR (TOK_FUNCTION month '2008-11-01 15:32:20')) (TOK_SELEXPR (TOK_FUNCTION year '2008-11-01 15:32:20'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -29,32 +26,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - dest1 + Map Operator Tree: TableScan alias: dest1 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: from_unixtime(1226446340) - type: string - expr: to_date(from_unixtime(1226446340)) - type: string - expr: day('2008-11-01') - type: int - expr: month('2008-11-01') - type: int - expr: year('2008-11-01') - type: int - expr: day('2008-11-01 15:32:20') - type: int - expr: month('2008-11-01 15:32:20') - type: int - expr: year('2008-11-01 15:32:20') - type: int + expressions: from_unixtime(1226446340) (type: string), to_date(from_unixtime(1226446340)) (type: string), day('2008-11-01') (type: int), month('2008-11-01') (type: int), year('2008-11-01') (type: int), day('2008-11-01 15:32:20') (type: int), month('2008-11-01 15:32:20') (type: int), year('2008-11-01 15:32:20') (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 221 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 221 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -81,9 +63,6 @@ POSTHOOK: query: EXPLAIN SELECT from_unixtime(unix_timestamp('2010-01-13 11:57:40', 'yyyy-MM-dd HH:mm:ss'), 'MM/dd/yy HH:mm:ss'), from_unixtime(unix_timestamp('2010-01-13 11:57:40')) from dest1 POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.c1 SIMPLE [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION from_unixtime (TOK_FUNCTION unix_timestamp '2010-01-13 11:57:40' 'yyyy-MM-dd HH:mm:ss') 'MM/dd/yy HH:mm:ss')) (TOK_SELEXPR (TOK_FUNCTION from_unixtime (TOK_FUNCTION unix_timestamp '2010-01-13 11:57:40')))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -91,20 +70,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - dest1 + Map Operator Tree: TableScan alias: dest1 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: from_unixtime(unix_timestamp('2010-01-13 11:57:40','yyyy-MM-dd HH:mm:ss'), 'MM/dd/yy HH:mm:ss') - type: string - expr: from_unixtime(unix_timestamp('2010-01-13 11:57:40')) - type: string + expressions: from_unixtime(unix_timestamp('2010-01-13 11:57:40','yyyy-MM-dd HH:mm:ss'), 'MM/dd/yy HH:mm:ss') (type: string), from_unixtime(unix_timestamp('2010-01-13 11:57:40')) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf6.q.out ql/src/test/results/clientpositive/udf6.q.out index 8cd2f24..4cf11d0 100644 --- ql/src/test/results/clientpositive/udf6.q.out +++ ql/src/test/results/clientpositive/udf6.q.out @@ -19,9 +19,6 @@ POSTHOOK: query: EXPLAIN SELECT IF(TRUE, 1, 2) FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.c1 SIMPLE [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION IF TRUE 1 2))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -29,18 +26,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - dest1 + Map Operator Tree: TableScan alias: dest1 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: if(true, 1, 2) - type: int + expressions: if(true, 1, 2) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -77,9 +73,6 @@ SELECT IF(TRUE, 1, 2), IF(FALSE, 1, 2), IF(NULL, 1, 2), IF(TRUE, "a", "b"), CAST('128' AS STRING) FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.c1 SIMPLE [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION IF TRUE 1 2)) (TOK_SELEXPR (TOK_FUNCTION IF FALSE 1 2)) (TOK_SELEXPR (TOK_FUNCTION IF TOK_NULL 1 2)) (TOK_SELEXPR (TOK_FUNCTION IF TRUE "a" "b")) (TOK_SELEXPR (TOK_FUNCTION IF TRUE 0.1 0.2)) (TOK_SELEXPR (TOK_FUNCTION IF FALSE (TOK_FUNCTION TOK_BIGINT 1) (TOK_FUNCTION TOK_BIGINT 2))) (TOK_SELEXPR (TOK_FUNCTION IF FALSE (TOK_FUNCTION TOK_TINYINT 127) (TOK_FUNCTION TOK_TINYINT 126))) (TOK_SELEXPR (TOK_FUNCTION IF FALSE (TOK_FUNCTION TOK_SMALLINT 127) (TOK_FUNCTION TOK_SMALLINT 128))) (TOK_SELEXPR (TOK_FUNCTION TOK_INT 128)) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE 1.0)) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING '128'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -87,38 +80,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - dest1 + Map Operator Tree: TableScan alias: dest1 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: if(true, 1, 2) - type: int - expr: if(false, 1, 2) - type: int - expr: if(null, 1, 2) - type: int - expr: if(true, 'a', 'b') - type: string - expr: if(true, 0.1, 0.2) - type: double - expr: if(false, UDFToLong(1), UDFToLong(2)) - type: bigint - expr: if(false, UDFToByte(127), UDFToByte(126)) - type: tinyint - expr: if(false, UDFToShort(127), UDFToShort(128)) - type: smallint - expr: 128 - type: int - expr: 1.0 - type: double - expr: '128' - type: string + expressions: if(true, 1, 2) (type: int), if(false, 1, 2) (type: int), if(null, 1, 2) (type: int), if(true, 'a', 'b') (type: string), if(true, 0.1, 0.2) (type: double), if(false, UDFToLong(1), UDFToLong(2)) (type: bigint), if(false, UDFToByte(127), UDFToByte(126)) (type: tinyint), if(false, UDFToShort(127), UDFToShort(128)) (type: smallint), 128 (type: int), 1.0 (type: double), '128' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf7.q.out ql/src/test/results/clientpositive/udf7.q.out index 77ccd8f..f58aed9 100644 --- ql/src/test/results/clientpositive/udf7.q.out +++ ql/src/test/results/clientpositive/udf7.q.out @@ -33,9 +33,6 @@ SELECT ROUND(LN(3.0),12), LN(0.0), LN(-1), ROUND(LOG(3.0),12), LOG(0.0), POW(CAST (2 AS DECIMAL), CAST(3 AS INT)) FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.c1 SIMPLE [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION ROUND (TOK_FUNCTION LN 3.0) 12)) (TOK_SELEXPR (TOK_FUNCTION LN 0.0)) (TOK_SELEXPR (TOK_FUNCTION LN (- 1))) (TOK_SELEXPR (TOK_FUNCTION ROUND (TOK_FUNCTION LOG 3.0) 12)) (TOK_SELEXPR (TOK_FUNCTION LOG 0.0)) (TOK_SELEXPR (TOK_FUNCTION LOG (- 1))) (TOK_SELEXPR (TOK_FUNCTION ROUND (TOK_FUNCTION LOG2 3.0) 12)) (TOK_SELEXPR (TOK_FUNCTION LOG2 0.0)) (TOK_SELEXPR (TOK_FUNCTION LOG2 (- 1))) (TOK_SELEXPR (TOK_FUNCTION ROUND (TOK_FUNCTION LOG10 3.0) 12)) (TOK_SELEXPR (TOK_FUNCTION LOG10 0.0)) (TOK_SELEXPR (TOK_FUNCTION LOG10 (- 1))) (TOK_SELEXPR (TOK_FUNCTION ROUND (TOK_FUNCTION LOG 2 3.0) 12)) (TOK_SELEXPR (TOK_FUNCTION LOG 2 0.0)) (TOK_SELEXPR (TOK_FUNCTION LOG 2 (- 1))) (TOK_SELEXPR (TOK_FUNCTION LOG 0.5 2)) (TOK_SELEXPR (TOK_FUNCTION LOG 2 0.5)) (TOK_SELEXPR (TOK_FUNCTION ROUND (TOK_FUNCTION EXP 2.0) 12)) (TOK_SELEXPR (TOK_FUNCTION POW 2 3)) (TOK_SELEXPR (TOK_FUNCTION POWER 2 3)) (TOK_SELEXPR (TOK_FUNCTION POWER 2 (- 3))) (TOK_SELEXPR (TOK_FUNCTION POWER 0.5 (- 3))) (TOK_SELEXPR (TOK_FUNCTION POWER 4 0.5)) (TOK_SELEXPR (TOK_FUNCTION POWER (- 1) 0.5)) (TOK_SELEXPR (TOK_FUNCTION POWER (- 1) 2)) (TOK_SELEXPR (TOK_FUNCTION POWER (TOK_FUNCTION TOK_DECIMAL 1) (TOK_FUNCTION TOK_INT 0))) (TOK_SELEXPR (TOK_FUNCTION POWER (TOK_FUNCTION TOK_DECIMAL 2) (TOK_FUNCTION TOK_INT 3))) (TOK_SELEXPR (TOK_FUNCTION POW (TOK_FUNCTION TOK_DECIMAL 2) (TOK_FUNCTION TOK_INT 3)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -43,72 +40,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - dest1 + Map Operator Tree: TableScan alias: dest1 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: round(ln(3.0), 12) - type: double - expr: ln(0.0) - type: double - expr: ln((- 1)) - type: double - expr: round(log(3.0), 12) - type: double - expr: log(0.0) - type: double - expr: log((- 1)) - type: double - expr: round(log2(3.0), 12) - type: double - expr: log2(0.0) - type: double - expr: log2((- 1)) - type: double - expr: round(log10(3.0), 12) - type: double - expr: log10(0.0) - type: double - expr: log10((- 1)) - type: double - expr: round(log(2, 3.0), 12) - type: double - expr: log(2, 0.0) - type: double - expr: log(2, (- 1)) - type: double - expr: log(0.5, 2) - type: double - expr: log(2, 0.5) - type: double - expr: round(exp(2.0), 12) - type: double - expr: power(2, 3) - type: double - expr: power(2, 3) - type: double - expr: power(2, (- 3)) - type: double - expr: power(0.5, (- 3)) - type: double - expr: power(4, 0.5) - type: double - expr: power((- 1), 0.5) - type: double - expr: power((- 1), 2) - type: double - expr: power(CAST( 1 AS decimal(10,0)), 0) - type: double - expr: power(CAST( 2 AS decimal(10,0)), 3) - type: double - expr: power(CAST( 2 AS decimal(10,0)), 3) - type: double + expressions: round(ln(3.0), 12) (type: double), ln(0.0) (type: double), ln((- 1)) (type: double), round(log(3.0), 12) (type: double), log(0.0) (type: double), log((- 1)) (type: double), round(log2(3.0), 12) (type: double), log2(0.0) (type: double), log2((- 1)) (type: double), round(log10(3.0), 12) (type: double), log10(0.0) (type: double), log10((- 1)) (type: double), round(log(2, 3.0), 12) (type: double), log(2, 0.0) (type: double), log(2, (- 1)) (type: double), log(0.5, 2) (type: double), log(2, 0.5) (type: double), round(exp(2.0), 12) (type: double), power(2, 3) (type: double), power(2, 3) (type: double), power(2, (- 3)) (type: double), power(0.5, (- 3)) (type: double), power(4, 0.5) (type: double), power((- 1), 0.5) (type: double), power((- 1), 2) (type: double), power(CAST( 1 AS decimal(10,0)), 0) (type: double), power(CAST( 2 AS decimal(10,0)), 3) (type: double), power(CAST( 2 AS decimal(10,0)), 3) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf8.q.out ql/src/test/results/clientpositive/udf8.q.out index 7189764..79c3bff 100644 --- ql/src/test/results/clientpositive/udf8.q.out +++ ql/src/test/results/clientpositive/udf8.q.out @@ -30,9 +30,6 @@ SELECT avg(c1), sum(c1), count(c1) FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.c1 SIMPLE [] POSTHOOK: Lineage: dest1.c1 SIMPLE [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL c1))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL c1))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL c1)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -40,54 +37,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - dest1 + Map Operator Tree: TableScan alias: dest1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: c1 - type: string + expressions: c1 (type: string) outputColumnNames: c1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: avg(c1) - expr: sum(c1) - expr: count(c1) - bucketGroup: false + aggregations: avg(c1), sum(c1), count(c1) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct - expr: _col1 - type: double - expr: _col2 - type: bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: avg(VALUE._col0) - expr: sum(VALUE._col1) - expr: count(VALUE._col2) - bucketGroup: false + aggregations: avg(VALUE._col0), sum(VALUE._col1), count(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: double - expr: _col2 - type: bigint + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf9.q.out ql/src/test/results/clientpositive/udf9.q.out index 0257e4a..6de9864 100644 --- ql/src/test/results/clientpositive/udf9.q.out +++ ql/src/test/results/clientpositive/udf9.q.out @@ -20,9 +20,6 @@ SELECT DATEDIFF('2008-12-31', '2009-01-01'), DATEDIFF('2008-03-01', '2008-02-28' DATE_SUB('2007-02-28', 365), DATE_SUB('2007-02-28 01:12:34', 730) FROM src WHERE src.key = 86 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION DATEDIFF '2008-12-31' '2009-01-01')) (TOK_SELEXPR (TOK_FUNCTION DATEDIFF '2008-03-01' '2008-02-28')) (TOK_SELEXPR (TOK_FUNCTION DATEDIFF '2007-03-01' '2007-01-28')) (TOK_SELEXPR (TOK_FUNCTION DATEDIFF '2008-03-01 23:59:59' '2008-03-02 00:00:00')) (TOK_SELEXPR (TOK_FUNCTION DATE_ADD '2008-12-31' 1)) (TOK_SELEXPR (TOK_FUNCTION DATE_ADD '2008-12-31' 365)) (TOK_SELEXPR (TOK_FUNCTION DATE_ADD '2008-02-28' 2)) (TOK_SELEXPR (TOK_FUNCTION DATE_ADD '2009-02-28' 2)) (TOK_SELEXPR (TOK_FUNCTION DATE_ADD '2007-02-28' 365)) (TOK_SELEXPR (TOK_FUNCTION DATE_ADD '2007-02-28 23:59:59' 730)) (TOK_SELEXPR (TOK_FUNCTION DATE_SUB '2009-01-01' 1)) (TOK_SELEXPR (TOK_FUNCTION DATE_SUB '2009-01-01' 365)) (TOK_SELEXPR (TOK_FUNCTION DATE_SUB '2008-02-28' 2)) (TOK_SELEXPR (TOK_FUNCTION DATE_SUB '2009-02-28' 2)) (TOK_SELEXPR (TOK_FUNCTION DATE_SUB '2007-02-28' 365)) (TOK_SELEXPR (TOK_FUNCTION DATE_SUB '2007-02-28 01:12:34' 730))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) key) 86)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -30,52 +27,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: datediff('2008-12-31', '2009-01-01') - type: int - expr: datediff('2008-03-01', '2008-02-28') - type: int - expr: datediff('2007-03-01', '2007-01-28') - type: int - expr: datediff('2008-03-01 23:59:59', '2008-03-02 00:00:00') - type: int - expr: date_add('2008-12-31', 1) - type: string - expr: date_add('2008-12-31', 365) - type: string - expr: date_add('2008-02-28', 2) - type: string - expr: date_add('2009-02-28', 2) - type: string - expr: date_add('2007-02-28', 365) - type: string - expr: date_add('2007-02-28 23:59:59', 730) - type: string - expr: date_sub('2009-01-01', 1) - type: string - expr: date_sub('2009-01-01', 365) - type: string - expr: date_sub('2008-02-28', 2) - type: string - expr: date_sub('2009-02-28', 2) - type: string - expr: date_sub('2007-02-28', 365) - type: string - expr: date_sub('2007-02-28 01:12:34', 730) - type: string + expressions: datediff('2008-12-31', '2009-01-01') (type: int), datediff('2008-03-01', '2008-02-28') (type: int), datediff('2007-03-01', '2007-01-28') (type: int), datediff('2008-03-01 23:59:59', '2008-03-02 00:00:00') (type: int), date_add('2008-12-31', 1) (type: string), date_add('2008-12-31', 365) (type: string), date_add('2008-02-28', 2) (type: string), date_add('2009-02-28', 2) (type: string), date_add('2007-02-28', 365) (type: string), date_add('2007-02-28 23:59:59', 730) (type: string), date_sub('2009-01-01', 1) (type: string), date_sub('2009-01-01', 365) (type: string), date_sub('2008-02-28', 2) (type: string), date_sub('2009-02-28', 2) (type: string), date_sub('2007-02-28', 365) (type: string), date_sub('2007-02-28 01:12:34', 730) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf_10_trims.q.out ql/src/test/results/clientpositive/udf_10_trims.q.out index 47051c8..04d0d4f 100644 --- ql/src/test/results/clientpositive/udf_10_trims.q.out +++ ql/src/test/results/clientpositive/udf_10_trims.q.out @@ -15,9 +15,6 @@ SELECT trim(trim(trim(trim(trim(trim(trim(trim(trim(trim( ' abc ')))))))))) FROM src WHERE src.key = 86 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION trim (TOK_FUNCTION trim (TOK_FUNCTION trim (TOK_FUNCTION trim (TOK_FUNCTION trim (TOK_FUNCTION trim (TOK_FUNCTION trim (TOK_FUNCTION trim (TOK_FUNCTION trim (TOK_FUNCTION trim ' abc ')))))))))))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) key) 86)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -31,22 +28,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: trim(trim(trim(trim(trim(trim(trim(trim(trim(trim(' abc ')))))))))) - type: string + expressions: trim(trim(trim(trim(trim(trim(trim(trim(trim(trim(' abc ')))))))))) (type: string) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -77,12 +72,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -91,12 +84,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf_E.q.out ql/src/test/results/clientpositive/udf_E.q.out index 96584ee..a859718 100644 --- ql/src/test/results/clientpositive/udf_E.q.out +++ ql/src/test/results/clientpositive/udf_E.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select E() FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION E))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -18,11 +15,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: e() - type: double + expressions: e() (type: double) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: select E() FROM src tablesample (1 rows) @@ -54,9 +51,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select E() FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION E))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -68,11 +62,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: e() - type: double + expressions: e() (type: double) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: select E() FROM src tablesample (1 rows) diff --git ql/src/test/results/clientpositive/udf_PI.q.out ql/src/test/results/clientpositive/udf_PI.q.out index ba778f5..f42850d 100644 --- ql/src/test/results/clientpositive/udf_PI.q.out +++ ql/src/test/results/clientpositive/udf_PI.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select PI() FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION PI))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -18,11 +15,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: pi() - type: double + expressions: pi() (type: double) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: select PI() FROM src tablesample (1 rows) @@ -54,9 +51,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select PI() FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION PI))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -68,11 +62,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: pi() - type: double + expressions: pi() (type: double) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: select PI() FROM src tablesample (1 rows) diff --git ql/src/test/results/clientpositive/udf_abs.q.out ql/src/test/results/clientpositive/udf_abs.q.out index d334583..ea68381 100644 --- ql/src/test/results/clientpositive/udf_abs.q.out +++ ql/src/test/results/clientpositive/udf_abs.q.out @@ -29,9 +29,6 @@ POSTHOOK: query: EXPLAIN SELECT abs(9223372036854775807) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION abs 0)) (TOK_SELEXPR (TOK_FUNCTION abs (- 1))) (TOK_SELEXPR (TOK_FUNCTION abs 123)) (TOK_SELEXPR (TOK_FUNCTION abs (- 9223372036854775807))) (TOK_SELEXPR (TOK_FUNCTION abs 9223372036854775807))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -43,19 +40,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: abs(0) - type: int - expr: abs((- 1)) - type: int - expr: abs(123) - type: int - expr: abs((- 9223372036854775807)) - type: bigint - expr: abs(9223372036854775807) - type: bigint + expressions: abs(0) (type: int), abs((- 1)) (type: int), abs(123) (type: int), abs((- 9223372036854775807)) (type: bigint), abs(9223372036854775807) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT @@ -91,9 +80,6 @@ POSTHOOK: query: EXPLAIN SELECT abs(3.14159265) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION abs 0.0)) (TOK_SELEXPR (TOK_FUNCTION abs (- 3.14159265))) (TOK_SELEXPR (TOK_FUNCTION abs 3.14159265))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -105,15 +91,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: abs(0.0) - type: double - expr: abs((- 3.14159265)) - type: double - expr: abs(3.14159265) - type: double + expressions: abs(0.0) (type: double), abs((- 3.14159265)) (type: double), abs(3.14159265) (type: double) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT diff --git ql/src/test/results/clientpositive/udf_array.q.out ql/src/test/results/clientpositive/udf_array.q.out index 1ea18ee..95a64a5 100644 --- ql/src/test/results/clientpositive/udf_array.q.out +++ ql/src/test/results/clientpositive/udf_array.q.out @@ -14,9 +14,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT array(), array()[1], array(1, 2, 3), array(1, 2, 3)[2], array(1,"a", 2, 3), array(1,"a", 2, 3)[2], array(array(1), array(2), array(3), array(4))[1][0] FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION array)) (TOK_SELEXPR ([ (TOK_FUNCTION array) 1)) (TOK_SELEXPR (TOK_FUNCTION array 1 2 3)) (TOK_SELEXPR ([ (TOK_FUNCTION array 1 2 3) 2)) (TOK_SELEXPR (TOK_FUNCTION array 1 "a" 2 3)) (TOK_SELEXPR ([ (TOK_FUNCTION array 1 "a" 2 3) 2)) (TOK_SELEXPR ([ ([ (TOK_FUNCTION array (TOK_FUNCTION array 1) (TOK_FUNCTION array 2) (TOK_FUNCTION array 3) (TOK_FUNCTION array 4)) 1) 0))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -28,23 +25,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: array() - type: array - expr: array()[1] - type: string - expr: array(1,2,3) - type: array - expr: array(1,2,3)[2] - type: int - expr: array(1,'a',2,3) - type: array - expr: array(1,'a',2,3)[2] - type: string - expr: array(array(1),array(2),array(3),array(4))[1][0] - type: int + expressions: array() (type: array), array()[1] (type: string), array(1,2,3) (type: array), array(1,2,3)[2] (type: int), array(1,'a',2,3) (type: array), array(1,'a',2,3)[2] (type: string), array(array(1),array(2),array(3),array(4))[1][0] (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT array(), array()[1], array(1, 2, 3), array(1, 2, 3)[2], array(1,"a", 2, 3), array(1,"a", 2, 3)[2], diff --git ql/src/test/results/clientpositive/udf_ascii.q.out ql/src/test/results/clientpositive/udf_ascii.q.out index edde74b..185a0cd 100644 --- ql/src/test/results/clientpositive/udf_ascii.q.out +++ ql/src/test/results/clientpositive/udf_ascii.q.out @@ -25,9 +25,6 @@ POSTHOOK: query: EXPLAIN SELECT ascii('!') FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION ascii 'Facebook')) (TOK_SELEXPR (TOK_FUNCTION ascii '')) (TOK_SELEXPR (TOK_FUNCTION ascii '!'))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -39,15 +36,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: ascii('Facebook') - type: int - expr: ascii('') - type: int - expr: ascii('!') - type: int + expressions: ascii('Facebook') (type: int), ascii('') (type: int), ascii('!') (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT diff --git ql/src/test/results/clientpositive/udf_between.q.out ql/src/test/results/clientpositive/udf_between.q.out index 0c9cc85..129f3cc 100644 --- ql/src/test/results/clientpositive/udf_between.q.out +++ ql/src/test/results/clientpositive/udf_between.q.out @@ -12,9 +12,6 @@ PREHOOK: query: explain SELECT * FROM src where key + 100 between (150 + -50) AN PREHOOK: type: QUERY POSTHOOK: query: explain SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION between KW_FALSE (+ (TOK_TABLE_OR_COL key) 100) (+ 150 (- 50)) (+ 150 50))) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -25,18 +22,17 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key + 100) BETWEEN (150 + (- 50)) AND (150 + 50) - type: boolean + predicate: (key + 100) BETWEEN (150 + (- 50)) AND (150 + 50) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT 20 @@ -71,9 +67,6 @@ PREHOOK: query: explain SELECT * FROM src where key + 100 not between (150 + -50 PREHOOK: type: QUERY POSTHOOK: query: explain SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50) LIMIT 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION between KW_TRUE (+ (TOK_TABLE_OR_COL key) 100) (+ 150 (- 50)) (+ 150 50))) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -84,18 +77,17 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key + 100) NOT BETWEEN (150 + (- 50)) AND (150 + 50) - type: boolean + predicate: (key + 100) NOT BETWEEN (150 + (- 50)) AND (150 + 50) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50) LIMIT 20 @@ -130,9 +122,6 @@ PREHOOK: query: explain SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1 PREHOOK: type: QUERY POSTHOOK: query: explain SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION between KW_FALSE 'b' 'a' 'c')) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -143,18 +132,17 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: 'b' BETWEEN 'a' AND 'c' - type: boolean + predicate: 'b' BETWEEN 'a' AND 'c' (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1 @@ -170,9 +158,6 @@ PREHOOK: query: explain SELECT * FROM src where 2 between 2 AND '3' LIMIT 1 PREHOOK: type: QUERY POSTHOOK: query: explain SELECT * FROM src where 2 between 2 AND '3' LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION between KW_FALSE 2 2 '3')) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -183,18 +168,17 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: 2 BETWEEN 2 AND '3' - type: boolean + predicate: 2 BETWEEN 2 AND '3' (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT * FROM src where 2 between 2 AND '3' LIMIT 1 diff --git ql/src/test/results/clientpositive/udf_case.q.out ql/src/test/results/clientpositive/udf_case.q.out index 5835278..fe11fce 100644 --- ql/src/test/results/clientpositive/udf_case.q.out +++ ql/src/test/results/clientpositive/udf_case.q.out @@ -64,9 +64,6 @@ SELECT CASE 1 END FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION CASE 1 1 2 3 4 5)) (TOK_SELEXPR (TOK_FUNCTION CASE 2 1 2 5)) (TOK_SELEXPR (TOK_FUNCTION CASE 14 12 13 14 15)) (TOK_SELEXPR (TOK_FUNCTION CASE 16 12 13 14 15)) (TOK_SELEXPR (TOK_FUNCTION CASE 17 18 TOK_NULL 17 20)) (TOK_SELEXPR (TOK_FUNCTION CASE 21 22 23 21 24))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -78,21 +75,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: CASE (1) WHEN (1) THEN (2) WHEN (3) THEN (4) ELSE (5) END - type: int - expr: CASE (2) WHEN (1) THEN (2) ELSE (5) END - type: int - expr: CASE (14) WHEN (12) THEN (13) WHEN (14) THEN (15) END - type: int - expr: CASE (16) WHEN (12) THEN (13) WHEN (14) THEN (15) END - type: int - expr: CASE (17) WHEN (18) THEN (null) WHEN (17) THEN (20) END - type: int - expr: CASE (21) WHEN (22) THEN (23) WHEN (21) THEN (24) END - type: int + expressions: CASE (1) WHEN (1) THEN (2) WHEN (3) THEN (4) ELSE (5) END (type: int), CASE (2) WHEN (1) THEN (2) ELSE (5) END (type: int), CASE (14) WHEN (12) THEN (13) WHEN (14) THEN (15) END (type: int), CASE (16) WHEN (12) THEN (13) WHEN (14) THEN (15) END (type: int), CASE (17) WHEN (18) THEN (null) WHEN (17) THEN (20) END (type: int), CASE (21) WHEN (22) THEN (23) WHEN (21) THEN (24) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT CASE 1 diff --git ql/src/test/results/clientpositive/udf_case_column_pruning.q.out ql/src/test/results/clientpositive/udf_case_column_pruning.q.out index db93b9c..0e2d6fa 100644 --- ql/src/test/results/clientpositive/udf_case_column_pruning.q.out +++ ql/src/test/results/clientpositive/udf_case_column_pruning.q.out @@ -18,9 +18,6 @@ FROM src a JOIN src b ON a.key = b.key ORDER BY key LIMIT 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION CASE (. (TOK_TABLE_OR_COL a) key) '1' 2 '3' 4 5) key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -29,34 +26,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: a + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -64,16 +51,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: CASE (_col0) WHEN ('1') THEN (2) WHEN ('3') THEN (4) ELSE (5) END - type: int + expressions: CASE (_col0) WHEN ('1') THEN (2) WHEN ('3') THEN (4) ELSE (5) END (type: int) outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -81,24 +66,22 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1010 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 1010 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf_case_thrift.q.out ql/src/test/results/clientpositive/udf_case_thrift.q.out index 3c19ec8..4b579d3 100644 --- ql/src/test/results/clientpositive/udf_case_thrift.q.out +++ ql/src/test/results/clientpositive/udf_case_thrift.q.out @@ -34,9 +34,6 @@ SELECT CASE src_thrift.lint[0] END)[0] FROM src_thrift tablesample (3 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION CASE ([ (. (TOK_TABLE_OR_COL src_thrift) lint) 0) 0 (+ ([ (. (TOK_TABLE_OR_COL src_thrift) lint) 0) 1) 1 (+ ([ (. (TOK_TABLE_OR_COL src_thrift) lint) 0) 2) 2 100 5)) (TOK_SELEXPR (TOK_FUNCTION CASE ([ (. (TOK_TABLE_OR_COL src_thrift) lstring) 0) '0' 'zero' '10' (TOK_FUNCTION CONCAT ([ (. (TOK_TABLE_OR_COL src_thrift) lstring) 0) " is ten") 'default')) (TOK_SELEXPR ([ (TOK_FUNCTION CASE ([ (. (TOK_TABLE_OR_COL src_thrift) lstring) 0) '0' (. (TOK_TABLE_OR_COL src_thrift) lstring) TOK_NULL) 0))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -48,15 +45,11 @@ STAGE PLANS: TableScan alias: src_thrift Row Limit Per Split: 3 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: CASE (lint[0]) WHEN (0) THEN ((lint[0] + 1)) WHEN (1) THEN ((lint[0] + 2)) WHEN (2) THEN (100) ELSE (5) END - type: int - expr: CASE (lstring[0]) WHEN ('0') THEN ('zero') WHEN ('10') THEN (concat(lstring[0], ' is ten')) ELSE ('default') END - type: string - expr: CASE (lstring[0]) WHEN ('0') THEN (lstring) ELSE (null) END[0] - type: string + expressions: CASE (lint[0]) WHEN (0) THEN ((lint[0] + 1)) WHEN (1) THEN ((lint[0] + 2)) WHEN (2) THEN (100) ELSE (5) END (type: int), CASE (lstring[0]) WHEN ('0') THEN ('zero') WHEN ('10') THEN (concat(lstring[0], ' is ten')) ELSE ('default') END (type: string), CASE (lstring[0]) WHEN ('0') THEN (lstring) ELSE (null) END[0] (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE ListSink PREHOOK: query: SELECT CASE src_thrift.lint[0] diff --git ql/src/test/results/clientpositive/udf_coalesce.q.out ql/src/test/results/clientpositive/udf_coalesce.q.out index 96b0199..78d0ab9 100644 --- ql/src/test/results/clientpositive/udf_coalesce.q.out +++ ql/src/test/results/clientpositive/udf_coalesce.q.out @@ -53,9 +53,6 @@ SELECT COALESCE(1), COALESCE(IF(TRUE, NULL, 0), NULL) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COALESCE 1)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 1 2)) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL 2)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 1 TOK_NULL)) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL TOK_NULL 3)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 4 TOK_NULL TOK_NULL TOK_NULL)) (TOK_SELEXPR (TOK_FUNCTION COALESCE '1')) (TOK_SELEXPR (TOK_FUNCTION COALESCE '1' '2')) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL '2')) (TOK_SELEXPR (TOK_FUNCTION COALESCE '1' TOK_NULL)) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL TOK_NULL '3')) (TOK_SELEXPR (TOK_FUNCTION COALESCE '4' TOK_NULL TOK_NULL TOK_NULL)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 1.0)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 1.0 2.0)) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL 2.0)) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL 2.0 3.0)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 2.0 TOK_NULL 3.0)) (TOK_SELEXPR (TOK_FUNCTION COALESCE (TOK_FUNCTION IF TRUE TOK_NULL 0) TOK_NULL))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -67,45 +64,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: COALESCE(1) - type: int - expr: COALESCE(1,2) - type: int - expr: COALESCE(null,2) - type: int - expr: COALESCE(1,null) - type: int - expr: COALESCE(null,null,3) - type: int - expr: COALESCE(4,null,null,null) - type: int - expr: COALESCE('1') - type: string - expr: COALESCE('1','2') - type: string - expr: COALESCE(null,'2') - type: string - expr: COALESCE('1',null) - type: string - expr: COALESCE(null,null,'3') - type: string - expr: COALESCE('4',null,null,null) - type: string - expr: COALESCE(1.0) - type: double - expr: COALESCE(1.0,2.0) - type: double - expr: COALESCE(null,2.0) - type: double - expr: COALESCE(null,2.0,3.0) - type: double - expr: COALESCE(2.0,null,3.0) - type: double - expr: COALESCE(if(true, null, 0),null) - type: int + expressions: COALESCE(1) (type: int), COALESCE(1,2) (type: int), COALESCE(null,2) (type: int), COALESCE(1,null) (type: int), COALESCE(null,null,3) (type: int), COALESCE(4,null,null,null) (type: int), COALESCE('1') (type: string), COALESCE('1','2') (type: string), COALESCE(null,'2') (type: string), COALESCE('1',null) (type: string), COALESCE(null,null,'3') (type: string), COALESCE('4',null,null,null) (type: string), COALESCE(1.0) (type: double), COALESCE(1.0,2.0) (type: double), COALESCE(null,2.0) (type: double), COALESCE(null,2.0,3.0) (type: double), COALESCE(2.0,null,3.0) (type: double), COALESCE(if(true, null, 0),null) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT COALESCE(1), @@ -165,9 +128,6 @@ SELECT COALESCE(src_thrift.lint[1], 999), COALESCE(src_thrift.mstringstring['key_2'], '999') FROM src_thrift POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COALESCE ([ (. (TOK_TABLE_OR_COL src_thrift) lint) 1) 999)) (TOK_SELEXPR (TOK_FUNCTION COALESCE (. ([ (. (TOK_TABLE_OR_COL src_thrift) lintstring) 0) mystring) '999')) (TOK_SELEXPR (TOK_FUNCTION COALESCE ([ (. (TOK_TABLE_OR_COL src_thrift) mstringstring) 'key_2') '999'))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -178,15 +138,11 @@ STAGE PLANS: Processor Tree: TableScan alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: COALESCE(lint[1],999) - type: int - expr: COALESCE(lintstring[0].mystring,'999') - type: string - expr: COALESCE(mstringstring['key_2'],'999') - type: string + expressions: COALESCE(lint[1],999) (type: int), COALESCE(lintstring[0].mystring,'999') (type: string), COALESCE(mstringstring['key_2'],'999') (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE ListSink PREHOOK: query: SELECT COALESCE(src_thrift.lint[1], 999), diff --git ql/src/test/results/clientpositive/udf_compare_java_string.q.out ql/src/test/results/clientpositive/udf_compare_java_string.q.out index 2205889..8e6e365 100644 --- ql/src/test/results/clientpositive/udf_compare_java_string.q.out +++ ql/src/test/results/clientpositive/udf_compare_java_string.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATEFUNCTION POSTHOOK: query: EXPLAIN CREATE TEMPORARY FUNCTION test_udf_get_java_string AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestGetJavaString' POSTHOOK: type: CREATEFUNCTION -ABSTRACT SYNTAX TREE: - (TOK_CREATEFUNCTION test_udf_get_java_string 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestGetJavaString') - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/udf_concat_ws.q.out ql/src/test/results/clientpositive/udf_concat_ws.q.out index afc1009..adc6aaf 100644 --- ql/src/test/results/clientpositive/udf_concat_ws.q.out +++ ql/src/test/results/clientpositive/udf_concat_ws.q.out @@ -42,9 +42,6 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.c1 SIMPLE [] POSTHOOK: Lineage: dest1.c2 SIMPLE [] POSTHOOK: Lineage: dest1.c3 SIMPLE [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION concat_ws (. (TOK_TABLE_OR_COL dest1) c1) (. (TOK_TABLE_OR_COL dest1) c2) (. (TOK_TABLE_OR_COL dest1) c3))) (TOK_SELEXPR (TOK_FUNCTION concat_ws ',' (. (TOK_TABLE_OR_COL dest1) c1) (. (TOK_TABLE_OR_COL dest1) c2) (. (TOK_TABLE_OR_COL dest1) c3))) (TOK_SELEXPR (TOK_FUNCTION concat_ws TOK_NULL (. (TOK_TABLE_OR_COL dest1) c1) (. (TOK_TABLE_OR_COL dest1) c2) (. (TOK_TABLE_OR_COL dest1) c3))) (TOK_SELEXPR (TOK_FUNCTION concat_ws '**' (. (TOK_TABLE_OR_COL dest1) c1) TOK_NULL (. (TOK_TABLE_OR_COL dest1) c3)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -55,17 +52,11 @@ STAGE PLANS: Processor Tree: TableScan alias: dest1 + Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: concat_ws(c1, c2, c3) - type: string - expr: concat_ws(',', c1, c2, c3) - type: string - expr: concat_ws(null, c1, c2, c3) - type: string - expr: concat_ws('**', c1, null, c3) - type: string + expressions: concat_ws(c1, c2, c3) (type: string), concat_ws(',', c1, c2, c3) (type: string), concat_ws(null, c1, c2, c3) (type: string), concat_ws('**', c1, null, c3) (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT concat_ws(dest1.c1, dest1.c2, dest1.c3), @@ -109,9 +100,6 @@ POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.c1 SIMPLE [] POSTHOOK: Lineage: dest1.c2 SIMPLE [] POSTHOOK: Lineage: dest1.c3 SIMPLE [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION concat_ws '.' (TOK_FUNCTION array 'www' 'face' 'book' 'com') '1234')) (TOK_SELEXPR (TOK_FUNCTION concat_ws '-' 'www' (TOK_FUNCTION array 'face' 'book' 'com') '1234')) (TOK_SELEXPR (TOK_FUNCTION concat_ws 'F' 'www' (TOK_FUNCTION array 'face' 'book' 'com' '1234'))) (TOK_SELEXPR (TOK_FUNCTION concat_ws '_' (TOK_FUNCTION array 'www' 'face') (TOK_FUNCTION array 'book' 'com' '1234'))) (TOK_SELEXPR (TOK_FUNCTION concat_ws '**' 'www' (TOK_FUNCTION array 'face') (TOK_FUNCTION array 'book' 'com' '1234'))) (TOK_SELEXPR (TOK_FUNCTION concat_ws '[]' (TOK_FUNCTION array 'www') 'face' (TOK_FUNCTION array 'book' 'com' '1234'))) (TOK_SELEXPR (TOK_FUNCTION concat_ws 'AAA' (TOK_FUNCTION array 'www') (TOK_FUNCTION array 'face' 'book' 'com') '1234'))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -123,23 +111,11 @@ STAGE PLANS: TableScan alias: dest1 Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: concat_ws('.', array('www','face','book','com'), '1234') - type: string - expr: concat_ws('-', 'www', array('face','book','com'), '1234') - type: string - expr: concat_ws('F', 'www', array('face','book','com','1234')) - type: string - expr: concat_ws('_', array('www','face'), array('book','com','1234')) - type: string - expr: concat_ws('**', 'www', array('face'), array('book','com','1234')) - type: string - expr: concat_ws('[]', array('www'), 'face', array('book','com','1234')) - type: string - expr: concat_ws('AAA', array('www'), array('face','book','com'), '1234') - type: string + expressions: concat_ws('.', array('www','face','book','com'), '1234') (type: string), concat_ws('-', 'www', array('face','book','com'), '1234') (type: string), concat_ws('F', 'www', array('face','book','com','1234')) (type: string), concat_ws('_', array('www','face'), array('book','com','1234')) (type: string), concat_ws('**', 'www', array('face'), array('book','com','1234')) (type: string), concat_ws('[]', array('www'), 'face', array('book','com','1234')) (type: string), concat_ws('AAA', array('www'), array('face','book','com'), '1234') (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 758 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'), diff --git ql/src/test/results/clientpositive/udf_count.q.out ql/src/test/results/clientpositive/udf_count.q.out index a2ad438..fb45708 100644 --- ql/src/test/results/clientpositive/udf_count.q.out +++ ql/src/test/results/clientpositive/udf_count.q.out @@ -16,9 +16,6 @@ PREHOOK: query: EXPLAIN SELECT count(key) FROM src PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT count(key) FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -26,42 +23,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(key) - bucketGroup: false + aggregations: count(key) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -84,9 +75,6 @@ PREHOOK: query: EXPLAIN SELECT count(DISTINCT key) FROM src PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT count(DISTINCT key) FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL key)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -94,48 +82,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT key) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(DISTINCT key) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col1 - type: bigint + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0:0._col0) - bucketGroup: false + aggregations: count(DISTINCT KEY._col0:0._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -158,9 +136,6 @@ PREHOOK: query: EXPLAIN SELECT count(DISTINCT key, value) FROM src PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT count(DISTINCT key, value) FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -168,54 +143,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT key, value) - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + aggregations: count(DISTINCT key, value) + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col2 - type: bigint + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0:0._col0, KEY._col0:0._col1) - bucketGroup: false + aggregations: count(DISTINCT KEY._col0:0._col0, KEY._col0:0._col1) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -238,9 +197,6 @@ PREHOOK: query: EXPLAIN SELECT count(*) FROM src PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT count(*) FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -248,38 +204,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -302,9 +254,6 @@ PREHOOK: query: EXPLAIN SELECT count(1) FROM src PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT count(1) FROM src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -312,38 +261,34 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf_degrees.q.out ql/src/test/results/clientpositive/udf_degrees.q.out index 9894e3e..82c7705 100644 --- ql/src/test/results/clientpositive/udf_degrees.q.out +++ ql/src/test/results/clientpositive/udf_degrees.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select degrees(PI()) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION degrees (TOK_FUNCTION PI)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -18,11 +15,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: degrees(pi()) - type: double + expressions: degrees(pi()) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: select degrees(PI()) FROM src tablesample (1 rows) @@ -54,9 +51,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select degrees(PI()) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION degrees (TOK_FUNCTION PI)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -68,11 +62,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: degrees(pi()) - type: double + expressions: degrees(pi()) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: select degrees(PI()) FROM src tablesample (1 rows) diff --git ql/src/test/results/clientpositive/udf_elt.q.out ql/src/test/results/clientpositive/udf_elt.q.out index 8c5c49a..c963f9d 100644 --- ql/src/test/results/clientpositive/udf_elt.q.out +++ ql/src/test/results/clientpositive/udf_elt.q.out @@ -39,9 +39,6 @@ SELECT elt(2, 'abc', 'defg'), elt(3, 'abc', 'defg') FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION elt 2 'abc' 'defg')) (TOK_SELEXPR (TOK_FUNCTION elt 3 'aa' 'bb' 'cc' 'dd' 'ee' 'ff' 'gg')) (TOK_SELEXPR (TOK_FUNCTION elt '1' 'abc' 'defg')) (TOK_SELEXPR (TOK_FUNCTION elt 2 'aa' (TOK_FUNCTION TOK_TINYINT '2'))) (TOK_SELEXPR (TOK_FUNCTION elt 2 'aa' (TOK_FUNCTION TOK_SMALLINT '12345'))) (TOK_SELEXPR (TOK_FUNCTION elt 2 'aa' (TOK_FUNCTION TOK_BIGINT '123456789012'))) (TOK_SELEXPR (TOK_FUNCTION elt 2 'aa' (TOK_FUNCTION TOK_FLOAT 1.25))) (TOK_SELEXPR (TOK_FUNCTION elt 2 'aa' (TOK_FUNCTION TOK_DOUBLE 16.0))) (TOK_SELEXPR (TOK_FUNCTION elt TOK_NULL 'abc' 'defg')) (TOK_SELEXPR (TOK_FUNCTION elt 0 'abc' 'defg')) (TOK_SELEXPR (TOK_FUNCTION elt 3 'abc' 'defg'))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -53,31 +50,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: elt(2, 'abc', 'defg') - type: string - expr: elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg') - type: string - expr: elt('1', 'abc', 'defg') - type: string - expr: elt(2, 'aa', UDFToByte('2')) - type: string - expr: elt(2, 'aa', UDFToShort('12345')) - type: string - expr: elt(2, 'aa', UDFToLong('123456789012')) - type: string - expr: elt(2, 'aa', UDFToFloat(1.25)) - type: string - expr: elt(2, 'aa', 16.0) - type: string - expr: elt(null, 'abc', 'defg') - type: string - expr: elt(0, 'abc', 'defg') - type: string - expr: elt(3, 'abc', 'defg') - type: string + expressions: elt(2, 'abc', 'defg') (type: string), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg') (type: string), elt('1', 'abc', 'defg') (type: string), elt(2, 'aa', UDFToByte('2')) (type: string), elt(2, 'aa', UDFToShort('12345')) (type: string), elt(2, 'aa', UDFToLong('123456789012')) (type: string), elt(2, 'aa', UDFToFloat(1.25)) (type: string), elt(2, 'aa', 16.0) (type: string), elt(null, 'abc', 'defg') (type: string), elt(0, 'abc', 'defg') (type: string), elt(3, 'abc', 'defg') (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT elt(2, 'abc', 'defg'), diff --git ql/src/test/results/clientpositive/udf_explode.q.out ql/src/test/results/clientpositive/udf_explode.q.out index 726b407..f7b6f2a 100644 --- ql/src/test/results/clientpositive/udf_explode.q.out +++ ql/src/test/results/clientpositive/udf_explode.q.out @@ -13,7 +13,30 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src tablesample (1 rows) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_TABLESPLITSAMPLE + TOK_ROWCOUNT + 1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + explode + TOK_FUNCTION + array + 1 + 2 + 3 + myCol + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -22,32 +45,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src Row Limit Per Split: 1 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: array(1,2,3) - type: array + expressions: array(1,2,3) (type: array) outputColumnNames: _col0 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -120,7 +136,53 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src tablesample (1 rows)) a GROUP BY a.myCol POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) myCol)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) myCol)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_TABLESPLITSAMPLE + TOK_ROWCOUNT + 1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + explode + TOK_FUNCTION + array + 1 + 2 + 3 + myCol + a + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + myCol + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + . + TOK_TABLE_OR_COL + a + myCol + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -129,57 +191,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src Row Limit Per Split: 1 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: array(1,2,3) - type: array + expressions: array(1,2,3) (type: array) outputColumnNames: _col0 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Select Operator - expressions: - expr: col - type: int + expressions: col (type: int) outputColumnNames: col - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: col - type: int + aggregations: count(1) + keys: col (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE tag: -1 - value expressions: - expr: _col1 - type: bigint + value expressions: _col1 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -230,32 +271,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -314,7 +344,34 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src tablesample (1 rows) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION map 1 'one' 2 'two' 3 'three')) key val)))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_TABLESPLITSAMPLE + TOK_ROWCOUNT + 1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + explode + TOK_FUNCTION + map + 1 + 'one' + 2 + 'two' + 3 + 'three' + key + val + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -323,32 +380,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src Row Limit Per Split: 1 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: map(1:'one',2:'two',3:'three') - type: map + expressions: map(1:'one',2:'two',3:'three') (type: map) outputColumnNames: _col0 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -421,7 +471,66 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src tablesample (1 rows)) a GROUP BY a.key, a.val POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION map 1 'one' 2 'two' 3 'three')) key val)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL a) val)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_TABLESPLITSAMPLE + TOK_ROWCOUNT + 1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + explode + TOK_FUNCTION + map + 1 + 'one' + 2 + 'two' + 3 + 'three' + key + val + a + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + val + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + a + val + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -430,65 +539,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src Row Limit Per Split: 1 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: map(1:'one',2:'two',3:'three') - type: map + expressions: map(1:'one',2:'two',3:'three') (type: map) outputColumnNames: _col0 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Select Operator - expressions: - expr: key - type: int - expr: value - type: string + expressions: key (type: int), value (type: string) outputColumnNames: key, value - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: int - expr: value - type: string + aggregations: count(1) + keys: key (type: int), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: string - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE tag: -1 - value expressions: - expr: _col2 - type: bigint + value expressions: _col2 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -539,36 +619,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/udf_find_in_set.q.out ql/src/test/results/clientpositive/udf_find_in_set.q.out index f782114..3ada003 100644 --- ql/src/test/results/clientpositive/udf_find_in_set.q.out +++ ql/src/test/results/clientpositive/udf_find_in_set.q.out @@ -20,9 +20,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src1 SELECT find_in_set(src1.key,concat(src1.key,',',src1.value)) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION find_in_set (. (TOK_TABLE_OR_COL src1) key) (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL src1) key) ',' (. (TOK_TABLE_OR_COL src1) value))))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -33,11 +30,11 @@ STAGE PLANS: Processor Tree: TableScan alias: src1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: find_in_set(key, concat(key, ',', value)) - type: int + expressions: find_in_set(key, concat(key, ',', value)) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: FROM src1 SELECT find_in_set(src1.key,concat(src1.key,',',src1.value)) diff --git ql/src/test/results/clientpositive/udf_format_number.q.out ql/src/test/results/clientpositive/udf_format_number.q.out index 9200356..6771ae0 100644 --- ql/src/test/results/clientpositive/udf_format_number.q.out +++ ql/src/test/results/clientpositive/udf_format_number.q.out @@ -29,9 +29,6 @@ SELECT format_number(12332.123456, 4), format_number(12332.1,4), format_number(12332.2,0) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION format_number 12332.123456 4)) (TOK_SELEXPR (TOK_FUNCTION format_number 12332.1 4)) (TOK_SELEXPR (TOK_FUNCTION format_number 12332.2 0))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -43,15 +40,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: format_number(12332.123456, 4) - type: string - expr: format_number(12332.1, 4) - type: string - expr: format_number(12332.2, 0) - type: string + expressions: format_number(12332.123456, 4) (type: string), format_number(12332.1, 4) (type: string), format_number(12332.2, 0) (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT format_number(12332.123456, 4), diff --git ql/src/test/results/clientpositive/udf_get_json_object.q.out ql/src/test/results/clientpositive/udf_get_json_object.q.out index 28069e6..d9764f5 100644 --- ql/src/test/results/clientpositive/udf_get_json_object.q.out +++ ql/src/test/results/clientpositive/udf_get_json_object.q.out @@ -44,9 +44,6 @@ POSTHOOK: query: EXPLAIN #### A masked pattern was here #### POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.c1 SIMPLE [] -ABSTRACT SYNTAX TREE: -#### A masked pattern was here #### - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -57,11 +54,11 @@ STAGE PLANS: Processor Tree: TableScan alias: src_json + Statistics: Num rows: 6 Data size: 645 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: #### A masked pattern was here #### - type: string outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 645 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT get_json_object(src_json.json, '$') FROM src_json diff --git ql/src/test/results/clientpositive/udf_hash.q.out ql/src/test/results/clientpositive/udf_hash.q.out index b504f2a..c2591d7 100644 --- ql/src/test/results/clientpositive/udf_hash.q.out +++ ql/src/test/results/clientpositive/udf_hash.q.out @@ -24,9 +24,6 @@ SELECT hash(CAST(1 AS TINYINT)), hash(CAST(2 AS SMALLINT)), hash(1, 2, 3) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION hash (TOK_FUNCTION TOK_TINYINT 1))) (TOK_SELEXPR (TOK_FUNCTION hash (TOK_FUNCTION TOK_SMALLINT 2))) (TOK_SELEXPR (TOK_FUNCTION hash 3)) (TOK_SELEXPR (TOK_FUNCTION hash (TOK_FUNCTION TOK_BIGINT '123456789012'))) (TOK_SELEXPR (TOK_FUNCTION hash (TOK_FUNCTION TOK_FLOAT 1.25))) (TOK_SELEXPR (TOK_FUNCTION hash (TOK_FUNCTION TOK_DOUBLE 16.0))) (TOK_SELEXPR (TOK_FUNCTION hash '400')) (TOK_SELEXPR (TOK_FUNCTION hash 'abc')) (TOK_SELEXPR (TOK_FUNCTION hash TRUE)) (TOK_SELEXPR (TOK_FUNCTION hash FALSE)) (TOK_SELEXPR (TOK_FUNCTION hash 1 2 3))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -38,31 +35,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: hash(UDFToByte(1)) - type: int - expr: hash(UDFToShort(2)) - type: int - expr: hash(3) - type: int - expr: hash(UDFToLong('123456789012')) - type: int - expr: hash(UDFToFloat(1.25)) - type: int - expr: hash(16.0) - type: int - expr: hash('400') - type: int - expr: hash('abc') - type: int - expr: hash(true) - type: int - expr: hash(false) - type: int - expr: hash(1,2,3) - type: int + expressions: hash(UDFToByte(1)) (type: int), hash(UDFToShort(2)) (type: int), hash(3) (type: int), hash(UDFToLong('123456789012')) (type: int), hash(UDFToFloat(1.25)) (type: int), hash(16.0) (type: int), hash('400') (type: int), hash('abc') (type: int), hash(true) (type: int), hash(false) (type: int), hash(1,2,3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT hash(CAST(1 AS TINYINT)), hash(CAST(2 AS SMALLINT)), diff --git ql/src/test/results/clientpositive/udf_hour.q.out ql/src/test/results/clientpositive/udf_hour.q.out index 36c6a5c..156fb5b 100644 --- ql/src/test/results/clientpositive/udf_hour.q.out +++ ql/src/test/results/clientpositive/udf_hour.q.out @@ -22,9 +22,6 @@ POSTHOOK: query: EXPLAIN SELECT hour('2009-08-07 13:14:15'), hour('13:14:15'), hour('2009-08-07') FROM src WHERE key = 86 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION hour '2009-08-07 13:14:15')) (TOK_SELEXPR (TOK_FUNCTION hour '13:14:15')) (TOK_SELEXPR (TOK_FUNCTION hour '2009-08-07'))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -35,19 +32,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: hour('2009-08-07 13:14:15') - type: int - expr: hour('13:14:15') - type: int - expr: hour('2009-08-07') - type: int + expressions: hour('2009-08-07 13:14:15') (type: int), hour('13:14:15') (type: int), hour('2009-08-07') (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT hour('2009-08-07 13:14:15'), hour('13:14:15'), hour('2009-08-07') diff --git ql/src/test/results/clientpositive/udf_if.q.out ql/src/test/results/clientpositive/udf_if.q.out index 26e3b2d..6a9458d 100644 --- ql/src/test/results/clientpositive/udf_if.q.out +++ ql/src/test/results/clientpositive/udf_if.q.out @@ -26,9 +26,6 @@ SELECT IF(TRUE, 1, 2) AS COL1, IF(IF(TRUE, NULL, FALSE), 1, 2) AS COL6 FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION IF TRUE 1 2) COL1) (TOK_SELEXPR (TOK_FUNCTION IF FALSE (TOK_FUNCTION TOK_STRING TOK_NULL) (TOK_FUNCTION TOK_STRING 1)) COL2) (TOK_SELEXPR (TOK_FUNCTION IF (= 1 1) (TOK_FUNCTION IF (= 2 2) 1 2) (TOK_FUNCTION IF (= 3 3) 3 4)) COL3) (TOK_SELEXPR (TOK_FUNCTION IF (= 2 2) 1 TOK_NULL) COL4) (TOK_SELEXPR (TOK_FUNCTION IF (= 2 2) TOK_NULL 1) COL5) (TOK_SELEXPR (TOK_FUNCTION IF (TOK_FUNCTION IF TRUE TOK_NULL FALSE) 1 2) COL6)))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -40,21 +37,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: if(true, 1, 2) - type: int - expr: if(false, UDFToString(null), UDFToString(1)) - type: string - expr: if((1 = 1), if((2 = 2), 1, 2), if((3 = 3), 3, 4)) - type: int - expr: if((2 = 2), 1, null) - type: int - expr: if((2 = 2), null, 1) - type: int - expr: if(if(true, null, false), 1, 2) - type: int + expressions: if(true, 1, 2) (type: int), if(false, UDFToString(null), UDFToString(1)) (type: string), if((1 = 1), if((2 = 2), 1, 2), if((3 = 3), 3, 4)) (type: int), if((2 = 2), 1, null) (type: int), if((2 = 2), null, 1) (type: int), if(if(true, null, false), 1, 2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT IF(TRUE, 1, 2) AS COL1, @@ -94,9 +81,6 @@ SELECT IF(TRUE, CAST(128 AS SMALLINT), CAST(1 AS TINYINT)) AS COL1, IF(FALSE, 'ABC', 12.3) AS COL4 FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION IF TRUE (TOK_FUNCTION TOK_SMALLINT 128) (TOK_FUNCTION TOK_TINYINT 1)) COL1) (TOK_SELEXPR (TOK_FUNCTION IF FALSE 1 1.1) COL2) (TOK_SELEXPR (TOK_FUNCTION IF FALSE 1 'ABC') COL3) (TOK_SELEXPR (TOK_FUNCTION IF FALSE 'ABC' 12.3) COL4)))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -108,17 +92,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: if(true, UDFToShort(128), UDFToByte(1)) - type: smallint - expr: if(false, 1, 1.1) - type: double - expr: if(false, 1, 'ABC') - type: string - expr: if(false, 'ABC', 12.3) - type: string + expressions: if(true, UDFToShort(128), UDFToByte(1)) (type: smallint), if(false, 1, 1.1) (type: double), if(false, 1, 'ABC') (type: string), if(false, 'ABC', 12.3) (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT IF(TRUE, CAST(128 AS SMALLINT), CAST(1 AS TINYINT)) AS COL1, diff --git ql/src/test/results/clientpositive/udf_in_file.q.out ql/src/test/results/clientpositive/udf_in_file.q.out index 414a49d..3285b69 100644 --- ql/src/test/results/clientpositive/udf_in_file.q.out +++ ql/src/test/results/clientpositive/udf_in_file.q.out @@ -15,9 +15,6 @@ SELECT in_file("303", "../../data/files/test2.dat"), in_file(CAST(NULL AS STRING), "../../data/files/test2.dat") FROM src LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION in_file "303" "../../data/files/test2.dat")) (TOK_SELEXPR (TOK_FUNCTION in_file "304" "../../data/files/test2.dat")) (TOK_SELEXPR (TOK_FUNCTION in_file (TOK_FUNCTION TOK_STRING TOK_NULL) "../../data/files/test2.dat"))) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -25,23 +22,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: in_file('303', '../../data/files/test2.dat') - type: boolean - expr: in_file('304', '../../data/files/test2.dat') - type: boolean - expr: in_file(UDFToString(null), '../../data/files/test2.dat') - type: boolean + expressions: in_file('303', '../../data/files/test2.dat') (type: boolean), in_file('304', '../../data/files/test2.dat') (type: boolean), in_file(UDFToString(null), '../../data/files/test2.dat') (type: boolean) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf_inline.q.out ql/src/test/results/clientpositive/udf_inline.q.out index 227b2e5..609ed8e 100644 --- ql/src/test/results/clientpositive/udf_inline.q.out +++ ql/src/test/results/clientpositive/udf_inline.q.out @@ -19,9 +19,6 @@ POSTHOOK: query: explain SELECT inline( ) ) as (id, text) FROM SRC limit 2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION inline (TOK_FUNCTION ARRAY (TOK_FUNCTION STRUCT 1 'dude!') (TOK_FUNCTION STRUCT 2 'Wheres') (TOK_FUNCTION STRUCT 3 'my car?'))) id text)) (TOK_LIMIT 2))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -29,21 +26,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: array(struct(1,'dude!'),struct(2,'Wheres'),struct(3,'my car?')) - type: array> + expressions: array(struct(1,'dude!'),struct(2,'Wheres'),struct(3,'my car?')) (type: array>) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: inline Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf_instr.q.out ql/src/test/results/clientpositive/udf_instr.q.out index c314b29..4533674 100644 --- ql/src/test/results/clientpositive/udf_instr.q.out +++ ql/src/test/results/clientpositive/udf_instr.q.out @@ -43,9 +43,6 @@ SELECT instr('abcd', 'abc'), instr('abcd', null) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION instr 'abcd' 'abc')) (TOK_SELEXPR (TOK_FUNCTION instr 'abcabc' 'ccc')) (TOK_SELEXPR (TOK_FUNCTION instr 123 '23')) (TOK_SELEXPR (TOK_FUNCTION instr 123 23)) (TOK_SELEXPR (TOK_FUNCTION instr TRUE 1)) (TOK_SELEXPR (TOK_FUNCTION instr FALSE 1)) (TOK_SELEXPR (TOK_FUNCTION instr '12345' (TOK_FUNCTION TOK_TINYINT '2'))) (TOK_SELEXPR (TOK_FUNCTION instr (TOK_FUNCTION TOK_SMALLINT '12345') '34')) (TOK_SELEXPR (TOK_FUNCTION instr (TOK_FUNCTION TOK_BIGINT '123456789012') '456')) (TOK_SELEXPR (TOK_FUNCTION instr (TOK_FUNCTION TOK_FLOAT 1.25) '.25')) (TOK_SELEXPR (TOK_FUNCTION instr (TOK_FUNCTION TOK_DOUBLE 16.0) '.0')) (TOK_SELEXPR (TOK_FUNCTION instr TOK_NULL 'abc')) (TOK_SELEXPR (TOK_FUNCTION instr 'abcd' TOK_NULL))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -57,35 +54,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: instr('abcd''abc') - type: int - expr: instr('abcabc''ccc') - type: int - expr: instr(123'23') - type: int - expr: instr(12323) - type: int - expr: instr(true1) - type: int - expr: instr(false1) - type: int - expr: instr('12345'UDFToByte('2')) - type: int - expr: instr(UDFToShort('12345')'34') - type: int - expr: instr(UDFToLong('123456789012')'456') - type: int - expr: instr(UDFToFloat(1.25)'.25') - type: int - expr: instr(16.0'.0') - type: int - expr: instr(null'abc') - type: int - expr: instr('abcd'null) - type: int + expressions: instr('abcd''abc') (type: int), instr('abcabc''ccc') (type: int), instr(123'23') (type: int), instr(12323) (type: int), instr(true1) (type: int), instr(false1) (type: int), instr('12345'UDFToByte('2')) (type: int), instr(UDFToShort('12345')'34') (type: int), instr(UDFToLong('123456789012')'456') (type: int), instr(UDFToFloat(1.25)'.25') (type: int), instr(16.0'.0') (type: int), instr(null'abc') (type: int), instr('abcd'null) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT instr('abcd', 'abc'), diff --git ql/src/test/results/clientpositive/udf_isnull_isnotnull.q.out ql/src/test/results/clientpositive/udf_isnull_isnotnull.q.out index a61f5df..c6188d3 100644 --- ql/src/test/results/clientpositive/udf_isnull_isnotnull.q.out +++ ql/src/test/results/clientpositive/udf_isnull_isnotnull.q.out @@ -32,9 +32,6 @@ SELECT NULL IS NULL, FROM src WHERE true IS NOT NULL LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_ISNULL TOK_NULL)) (TOK_SELEXPR (TOK_FUNCTION TOK_ISNOTNULL 1)) (TOK_SELEXPR (TOK_FUNCTION TOK_ISNOTNULL 'my string'))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL true)) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -45,20 +42,17 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Filter Operator - predicate: - expr: true is not null - type: boolean + predicate: true is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator - expressions: - expr: null is null - type: boolean - expr: 1 is not null - type: boolean - expr: 'my string' is not null - type: boolean + expressions: null is null (type: boolean), 1 is not null (type: boolean), 'my string' is not null (type: boolean) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE ListSink PREHOOK: query: SELECT NULL IS NULL, @@ -94,9 +88,6 @@ SELECT src_thrift.lint IS NOT NULL, WHERE src_thrift.lint IS NOT NULL AND NOT (src_thrift.mstringstring IS NULL) LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL src_thrift) lint))) (TOK_SELEXPR (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL src_thrift) lintstring))) (TOK_SELEXPR (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL src_thrift) mstringstring)))) (TOK_WHERE (AND (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL src_thrift) lint)) (NOT (TOK_FUNCTION TOK_ISNULL (. (TOK_TABLE_OR_COL src_thrift) mstringstring))))) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -107,20 +98,17 @@ STAGE PLANS: Processor Tree: TableScan alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (lint is not null and (not mstringstring is null)) - type: boolean + predicate: (lint is not null and (not mstringstring is null)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: lint is not null - type: boolean - expr: lintstring is not null - type: boolean - expr: mstringstring is not null - type: boolean + expressions: lint is not null (type: boolean), lintstring is not null (type: boolean), mstringstring is not null (type: boolean) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE ListSink PREHOOK: query: FROM src_thrift diff --git ql/src/test/results/clientpositive/udf_java_method.q.out ql/src/test/results/clientpositive/udf_java_method.q.out index e02f925..97efa6e 100644 --- ql/src/test/results/clientpositive/udf_java_method.q.out +++ ql/src/test/results/clientpositive/udf_java_method.q.out @@ -36,7 +36,64 @@ SELECT java_method("java.lang.String", "valueOf", 1), FROM src tablesample (1 rows) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION java_method "java.lang.String" "valueOf" 1)) (TOK_SELEXPR (TOK_FUNCTION java_method "java.lang.String" "isEmpty")) (TOK_SELEXPR (TOK_FUNCTION java_method "java.lang.Math" "max" 2 3)) (TOK_SELEXPR (TOK_FUNCTION java_method "java.lang.Math" "min" 2 3)) (TOK_SELEXPR (TOK_FUNCTION java_method "java.lang.Math" "round" 2.5)) (TOK_SELEXPR (TOK_FUNCTION java_method "java.lang.Math" "exp" 1.0)) (TOK_SELEXPR (TOK_FUNCTION java_method "java.lang.Math" "floor" 1.9))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_TABLESPLITSAMPLE + TOK_ROWCOUNT + 1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + java_method + "java.lang.String" + "valueOf" + 1 + TOK_SELEXPR + TOK_FUNCTION + java_method + "java.lang.String" + "isEmpty" + TOK_SELEXPR + TOK_FUNCTION + java_method + "java.lang.Math" + "max" + 2 + 3 + TOK_SELEXPR + TOK_FUNCTION + java_method + "java.lang.Math" + "min" + 2 + 3 + TOK_SELEXPR + TOK_FUNCTION + java_method + "java.lang.Math" + "round" + 2.5 + TOK_SELEXPR + TOK_FUNCTION + java_method + "java.lang.Math" + "exp" + 1.0 + TOK_SELEXPR + TOK_FUNCTION + java_method + "java.lang.Math" + "floor" + 1.9 + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -49,28 +106,12 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: reflect('java.lang.String','valueOf',1) - type: string - expr: reflect('java.lang.String','isEmpty') - type: string - expr: reflect('java.lang.Math','max',2,3) - type: string - expr: reflect('java.lang.Math','min',2,3) - type: string - expr: reflect('java.lang.Math','round',2.5) - type: string - expr: reflect('java.lang.Math','exp',1.0) - type: string - expr: reflect('java.lang.Math','floor',1.9) - type: string + expressions: reflect('java.lang.String','valueOf',1) (type: string), reflect('java.lang.String','isEmpty') (type: string), reflect('java.lang.Math','max',2,3) (type: string), reflect('java.lang.Math','min',2,3) (type: string), reflect('java.lang.Math','round',2.5) (type: string), reflect('java.lang.Math','exp',1.0) (type: string), reflect('java.lang.Math','floor',1.9) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT java_method("java.lang.String", "valueOf", 1), diff --git ql/src/test/results/clientpositive/udf_length.q.out ql/src/test/results/clientpositive/udf_length.q.out index 6894a7b..5a695c2 100644 --- ql/src/test/results/clientpositive/udf_length.q.out +++ ql/src/test/results/clientpositive/udf_length.q.out @@ -20,9 +20,6 @@ PREHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT length(src PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT length(src1.value) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION length (. (TOK_TABLE_OR_COL src1) value)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -36,18 +33,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1 + Map Operator Tree: TableScan alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: length(value) - type: int + expressions: length(value) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -78,12 +74,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -92,12 +86,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -182,9 +174,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT length(dest1.name) FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.len EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION length (. (TOK_TABLE_OR_COL dest1) name)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -195,11 +184,11 @@ STAGE PLANS: Processor Tree: TableScan alias: dest1 + Statistics: Num rows: 0 Data size: 6 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: length(name) - type: int + expressions: length(name) (type: int) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 6 Basic stats: PARTIAL Column stats: NONE ListSink PREHOOK: query: SELECT length(dest1.name) FROM dest1 diff --git ql/src/test/results/clientpositive/udf_like.q.out ql/src/test/results/clientpositive/udf_like.q.out index 9813f72..13c9876 100644 --- ql/src/test/results/clientpositive/udf_like.q.out +++ ql/src/test/results/clientpositive/udf_like.q.out @@ -23,9 +23,6 @@ SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_ '%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a','ab' LIKE '','' LIKE '' FROM src WHERE src.key = 86 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (LIKE '_%_' '%\_\%\_%')) (TOK_SELEXPR (LIKE '__' '%\_\%\_%')) (TOK_SELEXPR (LIKE '%%_%_' '%\_\%\_%')) (TOK_SELEXPR (LIKE '%_%_%' '%\%\_\%')) (TOK_SELEXPR (LIKE '_%_' '\%\_%')) (TOK_SELEXPR (LIKE '%__' '__\%%')) (TOK_SELEXPR (LIKE '_%' '\_\%\_\%%')) (TOK_SELEXPR (LIKE '_%' '\_\%_%')) (TOK_SELEXPR (LIKE '%_' '\%\_')) (TOK_SELEXPR (LIKE 'ab' '\%\_')) (TOK_SELEXPR (LIKE 'ab' '_a%')) (TOK_SELEXPR (LIKE 'ab' 'a')) (TOK_SELEXPR (LIKE 'ab' '')) (TOK_SELEXPR (LIKE '' ''))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) key) 86)))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -36,41 +33,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: ('_%_' like '%\_\%\_%') - type: boolean - expr: ('__' like '%\_\%\_%') - type: boolean - expr: ('%%_%_' like '%\_\%\_%') - type: boolean - expr: ('%_%_%' like '%\%\_\%') - type: boolean - expr: ('_%_' like '\%\_%') - type: boolean - expr: ('%__' like '__\%%') - type: boolean - expr: ('_%' like '\_\%\_\%%') - type: boolean - expr: ('_%' like '\_\%_%') - type: boolean - expr: ('%_' like '\%\_') - type: boolean - expr: ('ab' like '\%\_') - type: boolean - expr: ('ab' like '_a%') - type: boolean - expr: ('ab' like 'a') - type: boolean - expr: ('ab' like '') - type: boolean - expr: ('' like '') - type: boolean + expressions: ('_%_' like '%\_\%\_%') (type: boolean), ('__' like '%\_\%\_%') (type: boolean), ('%%_%_' like '%\_\%\_%') (type: boolean), ('%_%_%' like '%\%\_\%') (type: boolean), ('_%_' like '\%\_%') (type: boolean), ('%__' like '__\%%') (type: boolean), ('_%' like '\_\%\_\%%') (type: boolean), ('_%' like '\_\%_%') (type: boolean), ('%_' like '\%\_') (type: boolean), ('ab' like '\%\_') (type: boolean), ('ab' like '_a%') (type: boolean), ('ab' like 'a') (type: boolean), ('ab' like '') (type: boolean), ('' like '') (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%', diff --git ql/src/test/results/clientpositive/udf_locate.q.out ql/src/test/results/clientpositive/udf_locate.q.out index 5b0c0fe..736ff5c 100644 --- ql/src/test/results/clientpositive/udf_locate.q.out +++ ql/src/test/results/clientpositive/udf_locate.q.out @@ -51,9 +51,6 @@ SELECT locate('abc', 'abcd'), locate('abc', 'abcd', 'invalid number') FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION locate 'abc' 'abcd')) (TOK_SELEXPR (TOK_FUNCTION locate 'ccc' 'abcabc')) (TOK_SELEXPR (TOK_FUNCTION locate '23' 123)) (TOK_SELEXPR (TOK_FUNCTION locate 23 123)) (TOK_SELEXPR (TOK_FUNCTION locate 'abc' 'abcabc' 2)) (TOK_SELEXPR (TOK_FUNCTION locate 'abc' 'abcabc' '2')) (TOK_SELEXPR (TOK_FUNCTION locate 1 TRUE)) (TOK_SELEXPR (TOK_FUNCTION locate 1 FALSE)) (TOK_SELEXPR (TOK_FUNCTION locate (TOK_FUNCTION TOK_TINYINT '2') '12345')) (TOK_SELEXPR (TOK_FUNCTION locate '34' (TOK_FUNCTION TOK_SMALLINT '12345'))) (TOK_SELEXPR (TOK_FUNCTION locate '456' (TOK_FUNCTION TOK_BIGINT '123456789012'))) (TOK_SELEXPR (TOK_FUNCTION locate '.25' (TOK_FUNCTION TOK_FLOAT 1.25))) (TOK_SELEXPR (TOK_FUNCTION locate '.0' (TOK_FUNCTION TOK_DOUBLE 16.0))) (TOK_SELEXPR (TOK_FUNCTION locate TOK_NULL 'abc')) (TOK_SELEXPR (TOK_FUNCTION locate 'abc' TOK_NULL)) (TOK_SELEXPR (TOK_FUNCTION locate 'abc' 'abcd' TOK_NULL)) (TOK_SELEXPR (TOK_FUNCTION locate 'abc' 'abcd' 'invalid number'))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -65,43 +62,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: locate('abc''abcd') - type: int - expr: locate('ccc''abcabc') - type: int - expr: locate('23'123) - type: int - expr: locate(23123) - type: int - expr: locate('abc''abcabc'2) - type: int - expr: locate('abc''abcabc''2') - type: int - expr: locate(1true) - type: int - expr: locate(1false) - type: int - expr: locate(UDFToByte('2')'12345') - type: int - expr: locate('34'UDFToShort('12345')) - type: int - expr: locate('456'UDFToLong('123456789012')) - type: int - expr: locate('.25'UDFToFloat(1.25)) - type: int - expr: locate('.0'16.0) - type: int - expr: locate(null'abc') - type: int - expr: locate('abc'null) - type: int - expr: locate('abc''abcd'null) - type: int - expr: locate('abc''abcd''invalid number') - type: int + expressions: locate('abc''abcd') (type: int), locate('ccc''abcabc') (type: int), locate('23'123) (type: int), locate(23123) (type: int), locate('abc''abcabc'2) (type: int), locate('abc''abcabc''2') (type: int), locate(1true) (type: int), locate(1false) (type: int), locate(UDFToByte('2')'12345') (type: int), locate('34'UDFToShort('12345')) (type: int), locate('456'UDFToLong('123456789012')) (type: int), locate('.25'UDFToFloat(1.25)) (type: int), locate('.0'16.0) (type: int), locate(null'abc') (type: int), locate('abc'null) (type: int), locate('abc''abcd'null) (type: int), locate('abc''abcd''invalid number') (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT locate('abc', 'abcd'), diff --git ql/src/test/results/clientpositive/udf_logic_java_boolean.q.out ql/src/test/results/clientpositive/udf_logic_java_boolean.q.out index 9743bb8..88c1984 100644 --- ql/src/test/results/clientpositive/udf_logic_java_boolean.q.out +++ ql/src/test/results/clientpositive/udf_logic_java_boolean.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATEFUNCTION POSTHOOK: query: EXPLAIN CREATE TEMPORARY FUNCTION test_udf_get_java_boolean AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestGetJavaBoolean' POSTHOOK: type: CREATEFUNCTION -ABSTRACT SYNTAX TREE: - (TOK_CREATEFUNCTION test_udf_get_java_boolean 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestGetJavaBoolean') - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/udf_lower.q.out ql/src/test/results/clientpositive/udf_lower.q.out index 6474be8..58a5a16 100644 --- ql/src/test/results/clientpositive/udf_lower.q.out +++ ql/src/test/results/clientpositive/udf_lower.q.out @@ -18,9 +18,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT lower('AbC 123'), upper('AbC 123') FROM src WHERE key = 86 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION lower 'AbC 123')) (TOK_SELEXPR (TOK_FUNCTION upper 'AbC 123'))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -28,24 +25,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: lower('AbC 123') - type: string - expr: upper('AbC 123') - type: string + expressions: lower('AbC 123') (type: string), upper('AbC 123') (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf_lpad.q.out ql/src/test/results/clientpositive/udf_lpad.q.out index 2934ba0..148db4d 100644 --- ql/src/test/results/clientpositive/udf_lpad.q.out +++ ql/src/test/results/clientpositive/udf_lpad.q.out @@ -25,9 +25,6 @@ POSTHOOK: query: EXPLAIN SELECT lpad('hi', 6, '123') FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION lpad 'hi' 1 '?')) (TOK_SELEXPR (TOK_FUNCTION lpad 'hi' 5 '.')) (TOK_SELEXPR (TOK_FUNCTION lpad 'hi' 6 '123'))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -39,15 +36,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: lpad('hi', 1, '?') - type: string - expr: lpad('hi', 5, '.') - type: string - expr: lpad('hi', 6, '123') - type: string + expressions: lpad('hi', 1, '?') (type: string), lpad('hi', 5, '.') (type: string), lpad('hi', 6, '123') (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT diff --git ql/src/test/results/clientpositive/udf_map.q.out ql/src/test/results/clientpositive/udf_map.q.out index c8eee9f..597de2f 100644 --- ql/src/test/results/clientpositive/udf_map.q.out +++ ql/src/test/results/clientpositive/udf_map.q.out @@ -14,9 +14,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT map(), map(1, "a", 2, "b", 3, "c"), map(1, 2, "a", "b"), map(1, "a", 2, "b", 3, "c")[2], map(1, 2, "a", "b")["a"], map(1, array("a"))[1][0] FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION map)) (TOK_SELEXPR (TOK_FUNCTION map 1 "a" 2 "b" 3 "c")) (TOK_SELEXPR (TOK_FUNCTION map 1 2 "a" "b")) (TOK_SELEXPR ([ (TOK_FUNCTION map 1 "a" 2 "b" 3 "c") 2)) (TOK_SELEXPR ([ (TOK_FUNCTION map 1 2 "a" "b") "a")) (TOK_SELEXPR ([ ([ (TOK_FUNCTION map 1 (TOK_FUNCTION array "a")) 1) 0))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -28,21 +25,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: map() - type: map - expr: map(1:'a',2:'b',3:'c') - type: map - expr: map(1:2,'a':'b') - type: map - expr: map(1:'a',2:'b',3:'c')[2] - type: string - expr: map(1:2,'a':'b')['a'] - type: string - expr: map(1:array('a'))[1][0] - type: string + expressions: map() (type: map), map(1:'a',2:'b',3:'c') (type: map), map(1:2,'a':'b') (type: map), map(1:'a',2:'b',3:'c')[2] (type: string), map(1:2,'a':'b')['a'] (type: string), map(1:array('a'))[1][0] (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT map(), map(1, "a", 2, "b", 3, "c"), map(1, 2, "a", "b"), diff --git ql/src/test/results/clientpositive/udf_minute.q.out ql/src/test/results/clientpositive/udf_minute.q.out index 352ecbd..3815e6b 100644 --- ql/src/test/results/clientpositive/udf_minute.q.out +++ ql/src/test/results/clientpositive/udf_minute.q.out @@ -22,9 +22,6 @@ POSTHOOK: query: EXPLAIN SELECT minute('2009-08-07 13:14:15'), minute('13:14:15'), minute('2009-08-07') FROM src WHERE key = 86 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION minute '2009-08-07 13:14:15')) (TOK_SELEXPR (TOK_FUNCTION minute '13:14:15')) (TOK_SELEXPR (TOK_FUNCTION minute '2009-08-07'))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -32,26 +29,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: minute('2009-08-07 13:14:15') - type: int - expr: minute('13:14:15') - type: int - expr: minute('2009-08-07') - type: int + expressions: minute('2009-08-07 13:14:15') (type: int), minute('13:14:15') (type: int), minute('2009-08-07') (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf_named_struct.q.out ql/src/test/results/clientpositive/udf_named_struct.q.out index 71ee458..cab557d 100644 --- ql/src/test/results/clientpositive/udf_named_struct.q.out +++ ql/src/test/results/clientpositive/udf_named_struct.q.out @@ -16,9 +16,6 @@ POSTHOOK: query: EXPLAIN SELECT named_struct("foo", 1, "bar", 2), named_struct("foo", 1, "bar", 2).foo FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION named_struct "foo" 1 "bar" 2)) (TOK_SELEXPR (. (TOK_FUNCTION named_struct "foo" 1 "bar" 2) foo))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -30,13 +27,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: named_struct('foo',1,'bar',2) - type: struct - expr: named_struct('foo',1,'bar',2).foo - type: int + expressions: named_struct('foo',1,'bar',2) (type: struct), named_struct('foo',1,'bar',2).foo (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT named_struct("foo", 1, "bar", 2), diff --git ql/src/test/results/clientpositive/udf_notequal.q.out ql/src/test/results/clientpositive/udf_notequal.q.out index cd96e4f..8ea8a03 100644 --- ql/src/test/results/clientpositive/udf_notequal.q.out +++ ql/src/test/results/clientpositive/udf_notequal.q.out @@ -30,9 +30,6 @@ SELECT key, value FROM src WHERE key <> '302' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (<> (TOK_TABLE_OR_COL key) '302')))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -43,17 +40,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key <> '302') - type: boolean + predicate: (key <> '302') (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key, value @@ -577,9 +571,6 @@ SELECT key, value FROM src WHERE key != '302' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (!= (TOK_TABLE_OR_COL key) '302')))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -590,17 +581,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key <> '302') - type: boolean + predicate: (key <> '302') (type: boolean) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key, value diff --git ql/src/test/results/clientpositive/udf_nvl.q.out ql/src/test/results/clientpositive/udf_nvl.q.out index 96caa20..2565779 100644 --- ql/src/test/results/clientpositive/udf_nvl.q.out +++ ql/src/test/results/clientpositive/udf_nvl.q.out @@ -21,9 +21,6 @@ SELECT NVL( 1 , 2 ) AS COL1, NVL( NULL, 5 ) AS COL2 FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION NVL 1 2) COL1) (TOK_SELEXPR (TOK_FUNCTION NVL TOK_NULL 5) COL2)))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -35,13 +32,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: if 1 is null returns2 - type: int - expr: if null is null returns5 - type: int + expressions: if 1 is null returns2 (type: int), if null is null returns5 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT NVL( 1 , 2 ) AS COL1, diff --git ql/src/test/results/clientpositive/udf_parse_url.q.out ql/src/test/results/clientpositive/udf_parse_url.q.out index c2fa1b7..0e94023 100644 --- ql/src/test/results/clientpositive/udf_parse_url.q.out +++ ql/src/test/results/clientpositive/udf_parse_url.q.out @@ -45,9 +45,6 @@ parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'USERINFO') , parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'AUTHORITY') FROM src WHERE key = 86 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION parse_url 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' 'HOST')) (TOK_SELEXPR (TOK_FUNCTION parse_url 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' 'PATH')) (TOK_SELEXPR (TOK_FUNCTION parse_url 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' 'QUERY')) (TOK_SELEXPR (TOK_FUNCTION parse_url 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' 'REF')) (TOK_SELEXPR (TOK_FUNCTION parse_url 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' 'QUERY' 'k2')) (TOK_SELEXPR (TOK_FUNCTION parse_url 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' 'QUERY' 'k1')) (TOK_SELEXPR (TOK_FUNCTION parse_url 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' 'QUERY' 'k3')) (TOK_SELEXPR (TOK_FUNCTION parse_url 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' 'FILE')) (TOK_SELEXPR (TOK_FUNCTION parse_url 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' 'PROTOCOL')) (TOK_SELEXPR (TOK_FUNCTION parse_url 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' 'USERINFO')) (TOK_SELEXPR (TOK_FUNCTION parse_url 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' 'AUTHORITY'))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -55,42 +52,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST') - type: string - expr: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'PATH') - type: string - expr: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY') - type: string - expr: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'REF') - type: string - expr: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY', 'k2') - type: string - expr: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY', 'k1') - type: string - expr: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY', 'k3') - type: string - expr: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'FILE') - type: string - expr: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'PROTOCOL') - type: string - expr: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'USERINFO') - type: string - expr: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'AUTHORITY') - type: string + expressions: parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST') (type: string), parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'PATH') (type: string), parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY') (type: string), parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'REF') (type: string), parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY', 'k2') (type: string), parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY', 'k1') (type: string), parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY', 'k3') (type: string), parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'FILE') (type: string), parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'PROTOCOL') (type: string), parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'USERINFO') (type: string), parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'AUTHORITY') (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf_printf.q.out ql/src/test/results/clientpositive/udf_printf.q.out index e9d05eb..38e8868 100644 --- ql/src/test/results/clientpositive/udf_printf.q.out +++ ql/src/test/results/clientpositive/udf_printf.q.out @@ -25,9 +25,6 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT printf("Hello World %d %s", 100, "days") FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION printf "Hello World %d %s" 100 "days"))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -39,11 +36,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: printf('Hello World %d %s', 100, 'days') - type: string + expressions: printf('Hello World %d %s', 100, 'days') (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: -- Test Primitive Types diff --git ql/src/test/results/clientpositive/udf_radians.q.out ql/src/test/results/clientpositive/udf_radians.q.out index 9453bbf..0729b1b 100644 --- ql/src/test/results/clientpositive/udf_radians.q.out +++ ql/src/test/results/clientpositive/udf_radians.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select radians(57.2958) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION radians 57.2958))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -18,11 +15,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: radians(57.2958) - type: double + expressions: radians(57.2958) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: select radians(57.2958) FROM src tablesample (1 rows) @@ -63,9 +60,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select radians(57.2958) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION radians 57.2958))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -77,11 +71,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: radians(57.2958) - type: double + expressions: radians(57.2958) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: select radians(57.2958) FROM src tablesample (1 rows) diff --git ql/src/test/results/clientpositive/udf_reflect.q.out ql/src/test/results/clientpositive/udf_reflect.q.out index 1f5ce43..44e10ec 100644 --- ql/src/test/results/clientpositive/udf_reflect.q.out +++ ql/src/test/results/clientpositive/udf_reflect.q.out @@ -34,7 +34,72 @@ SELECT reflect("java.lang.String", "valueOf", 1), FROM src tablesample (1 rows) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION reflect "java.lang.String" "valueOf" 1)) (TOK_SELEXPR (TOK_FUNCTION reflect "java.lang.String" "isEmpty")) (TOK_SELEXPR (TOK_FUNCTION reflect "java.lang.Math" "max" 2 3)) (TOK_SELEXPR (TOK_FUNCTION reflect "java.lang.Math" "min" 2 3)) (TOK_SELEXPR (TOK_FUNCTION reflect "java.lang.Math" "round" 2.5)) (TOK_SELEXPR (TOK_FUNCTION reflect "java.lang.Math" "exp" 1.0)) (TOK_SELEXPR (TOK_FUNCTION reflect "java.lang.Math" "floor" 1.9)) (TOK_SELEXPR (TOK_FUNCTION reflect "java.lang.Integer" "valueOf" (TOK_TABLE_OR_COL key) 16))))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_TABLESPLITSAMPLE + TOK_ROWCOUNT + 1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + reflect + "java.lang.String" + "valueOf" + 1 + TOK_SELEXPR + TOK_FUNCTION + reflect + "java.lang.String" + "isEmpty" + TOK_SELEXPR + TOK_FUNCTION + reflect + "java.lang.Math" + "max" + 2 + 3 + TOK_SELEXPR + TOK_FUNCTION + reflect + "java.lang.Math" + "min" + 2 + 3 + TOK_SELEXPR + TOK_FUNCTION + reflect + "java.lang.Math" + "round" + 2.5 + TOK_SELEXPR + TOK_FUNCTION + reflect + "java.lang.Math" + "exp" + 1.0 + TOK_SELEXPR + TOK_FUNCTION + reflect + "java.lang.Math" + "floor" + 1.9 + TOK_SELEXPR + TOK_FUNCTION + reflect + "java.lang.Integer" + "valueOf" + TOK_TABLE_OR_COL + key + 16 + STAGE DEPENDENCIES: Stage-0 is a root stage @@ -47,30 +112,12 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: reflect('java.lang.String','valueOf',1) - type: string - expr: reflect('java.lang.String','isEmpty') - type: string - expr: reflect('java.lang.Math','max',2,3) - type: string - expr: reflect('java.lang.Math','min',2,3) - type: string - expr: reflect('java.lang.Math','round',2.5) - type: string - expr: reflect('java.lang.Math','exp',1.0) - type: string - expr: reflect('java.lang.Math','floor',1.9) - type: string - expr: reflect('java.lang.Integer','valueOf',key,16) - type: string + expressions: reflect('java.lang.String','valueOf',1) (type: string), reflect('java.lang.String','isEmpty') (type: string), reflect('java.lang.Math','max',2,3) (type: string), reflect('java.lang.Math','min',2,3) (type: string), reflect('java.lang.Math','round',2.5) (type: string), reflect('java.lang.Math','exp',1.0) (type: string), reflect('java.lang.Math','floor',1.9) (type: string), reflect('java.lang.Integer','valueOf',key,16) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: - numRows: 58 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT reflect("java.lang.String", "valueOf", 1), diff --git ql/src/test/results/clientpositive/udf_reflect2.q.out ql/src/test/results/clientpositive/udf_reflect2.q.out index ba0b814..c326c0d 100644 --- ql/src/test/results/clientpositive/udf_reflect2.q.out +++ ql/src/test/results/clientpositive/udf_reflect2.q.out @@ -79,7 +79,233 @@ SELECT key, FROM (select cast(key as int) key, value, cast('2013-02-15 19:41:20' as timestamp) ts from src) a LIMIT 5 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL key)) key) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP '2013-02-15 19:41:20') ts)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL key) "byteValue")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL key) "shortValue")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL key) "intValue")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL key) "longValue")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL key) "floatValue")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL key) "doubleValue")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL key) "toString")) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL value) "concat" "_concat")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL value) "contains" "86")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL value) "startsWith" "v")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL value) "endsWith" "6")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL value) "equals" "val_86")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL value) "equalsIgnoreCase" "VAL_86")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL value) "getBytes")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL value) "indexOf" "1")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL value) "lastIndexOf" "1")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL value) "replace" "val" "VALUE")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL value) "substring" 1)) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL value) "substring" 1 5)) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL value) "toUpperCase")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL value) "trim")) (TOK_SELEXPR (TOK_TABLE_OR_COL ts)) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL ts) "getYear")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL ts) "getMonth")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL ts) "getDay")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL ts) "getHours")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL ts) "getMinutes")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL ts) "getSeconds")) (TOK_SELEXPR (TOK_FUNCTION reflect2 (TOK_TABLE_OR_COL ts) "getTime"))) (TOK_LIMIT 5))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + TOK_INT + TOK_TABLE_OR_COL + key + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_FUNCTION + TOK_TIMESTAMP + '2013-02-15 19:41:20' + ts + a + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + key + "byteValue" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + key + "shortValue" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + key + "intValue" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + key + "longValue" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + key + "floatValue" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + key + "doubleValue" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + key + "toString" + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + value + "concat" + "_concat" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + value + "contains" + "86" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + value + "startsWith" + "v" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + value + "endsWith" + "6" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + value + "equals" + "val_86" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + value + "equalsIgnoreCase" + "VAL_86" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + value + "getBytes" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + value + "indexOf" + "1" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + value + "lastIndexOf" + "1" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + value + "replace" + "val" + "VALUE" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + value + "substring" + 1 + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + value + "substring" + 1 + 5 + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + value + "toUpperCase" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + value + "trim" + TOK_SELEXPR + TOK_TABLE_OR_COL + ts + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + ts + "getYear" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + ts + "getMonth" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + ts + "getDay" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + ts + "getHours" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + ts + "getMinutes" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + ts + "getSeconds" + TOK_SELEXPR + TOK_FUNCTION + reflect2 + TOK_TABLE_OR_COL + ts + "getTime" + TOK_LIMIT + 5 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -88,101 +314,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: - expr: UDFToInteger(key) - type: int - expr: value - type: string - expr: CAST( '2013-02-15 19:41:20' AS TIMESTAMP) - type: timestamp + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( '2013-02-15 19:41:20' AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: reflect2(_col0,'byteValue') - type: tinyint - expr: reflect2(_col0,'shortValue') - type: smallint - expr: reflect2(_col0,'intValue') - type: int - expr: reflect2(_col0,'longValue') - type: bigint - expr: reflect2(_col0,'floatValue') - type: float - expr: reflect2(_col0,'doubleValue') - type: double - expr: reflect2(_col0,'toString') - type: string - expr: _col1 - type: string - expr: reflect2(_col1,'concat','_concat') - type: string - expr: reflect2(_col1,'contains','86') - type: boolean - expr: reflect2(_col1,'startsWith','v') - type: boolean - expr: reflect2(_col1,'endsWith','6') - type: boolean - expr: reflect2(_col1,'equals','val_86') - type: boolean - expr: reflect2(_col1,'equalsIgnoreCase','VAL_86') - type: boolean - expr: reflect2(_col1,'getBytes') - type: binary - expr: reflect2(_col1,'indexOf','1') - type: int - expr: reflect2(_col1,'lastIndexOf','1') - type: int - expr: reflect2(_col1,'replace','val','VALUE') - type: string - expr: reflect2(_col1,'substring',1) - type: string - expr: reflect2(_col1,'substring',1,5) - type: string - expr: reflect2(_col1,'toUpperCase') - type: string - expr: reflect2(_col1,'trim') - type: string - expr: _col2 - type: timestamp - expr: reflect2(_col2,'getYear') - type: int - expr: reflect2(_col2,'getMonth') - type: int - expr: reflect2(_col2,'getDay') - type: int - expr: reflect2(_col2,'getHours') - type: int - expr: reflect2(_col2,'getMinutes') - type: int - expr: reflect2(_col2,'getSeconds') - type: int - expr: reflect2(_col2,'getTime') - type: bigint + expressions: _col0 (type: int), reflect2(_col0,'byteValue') (type: tinyint), reflect2(_col0,'shortValue') (type: smallint), reflect2(_col0,'intValue') (type: int), reflect2(_col0,'longValue') (type: bigint), reflect2(_col0,'floatValue') (type: float), reflect2(_col0,'doubleValue') (type: double), reflect2(_col0,'toString') (type: string), _col1 (type: string), reflect2(_col1,'concat','_concat') (type: string), reflect2(_col1,'contains','86') (type: boolean), reflect2(_col1,'startsWith','v') (type: boolean), reflect2(_col1,'endsWith','6') (type: boolean), reflect2(_col1,'equals','val_86') (type: boolean), reflect2(_col1,'equalsIgnoreCase','VAL_86') (type: boolean), reflect2(_col1,'getBytes') (type: binary), reflect2(_col1,'indexOf','1') (type: int), reflect2(_col1,'lastIndexOf','1') (type: int), reflect2(_col1,'replace','val','VALUE') (type: string), reflect2(_col1,'substring',1) (type: string), reflect2(_col1,'substring',1,5) (type: string), reflect2(_col1,'toUpperCase') (type: string), reflect2(_col1,'trim') (type: string), _col2 (type: timestamp), reflect2(_col2,'getYear') (type: int), reflect2(_col2,'getMonth') (type: int), reflect2(_col2,'getDay') (type: int), reflect2(_col2,'getHours') (type: int), reflect2(_col2,'getMinutes') (type: int), reflect2(_col2,'getSeconds') (type: int), reflect2(_col2,'getTime') (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 - Statistics: - numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit - Statistics: - numRows: 5 dataSize: 1000 basicStatsState: COMPLETE colStatsState: NONE + Number of rows: 5 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 5 dataSize: 1000 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/udf_repeat.q.out ql/src/test/results/clientpositive/udf_repeat.q.out index 06303e7..07b09e9 100644 --- ql/src/test/results/clientpositive/udf_repeat.q.out +++ ql/src/test/results/clientpositive/udf_repeat.q.out @@ -25,9 +25,6 @@ POSTHOOK: query: EXPLAIN SELECT repeat("asdf", -1) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION repeat "Facebook" 3)) (TOK_SELEXPR (TOK_FUNCTION repeat "" 4)) (TOK_SELEXPR (TOK_FUNCTION repeat "asd" 0)) (TOK_SELEXPR (TOK_FUNCTION repeat "asdf" (- 1)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -39,17 +36,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: repeat('Facebook', 3) - type: string - expr: repeat('', 4) - type: string - expr: repeat('asd', 0) - type: string - expr: repeat('asdf', (- 1)) - type: string + expressions: repeat('Facebook', 3) (type: string), repeat('', 4) (type: string), repeat('asd', 0) (type: string), repeat('asdf', (- 1)) (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT diff --git ql/src/test/results/clientpositive/udf_reverse.q.out ql/src/test/results/clientpositive/udf_reverse.q.out index a5ba453..4ca3f68 100644 --- ql/src/test/results/clientpositive/udf_reverse.q.out +++ ql/src/test/results/clientpositive/udf_reverse.q.out @@ -20,9 +20,6 @@ PREHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT reverse(sr PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT reverse(src1.value) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION reverse (. (TOK_TABLE_OR_COL src1) value)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -36,18 +33,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src1 + Map Operator Tree: TableScan alias: src1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: reverse(value) - type: string + expressions: reverse(value) (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -78,12 +74,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -92,12 +86,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf_rpad.q.out ql/src/test/results/clientpositive/udf_rpad.q.out index 352850c..287f5a9 100644 --- ql/src/test/results/clientpositive/udf_rpad.q.out +++ ql/src/test/results/clientpositive/udf_rpad.q.out @@ -25,9 +25,6 @@ POSTHOOK: query: EXPLAIN SELECT rpad('hi', 6, '123') FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION rpad 'hi' 1 '?')) (TOK_SELEXPR (TOK_FUNCTION rpad 'hi' 5 '.')) (TOK_SELEXPR (TOK_FUNCTION rpad 'hi' 6 '123'))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -39,15 +36,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: rpad('hi', 1, '?') - type: string - expr: rpad('hi', 5, '.') - type: string - expr: rpad('hi', 6, '123') - type: string + expressions: rpad('hi', 1, '?') (type: string), rpad('hi', 5, '.') (type: string), rpad('hi', 6, '123') (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT diff --git ql/src/test/results/clientpositive/udf_second.q.out ql/src/test/results/clientpositive/udf_second.q.out index 100c04f..4abfae1 100644 --- ql/src/test/results/clientpositive/udf_second.q.out +++ ql/src/test/results/clientpositive/udf_second.q.out @@ -22,9 +22,6 @@ POSTHOOK: query: EXPLAIN SELECT second('2009-08-07 13:14:15'), second('13:14:15'), second('2009-08-07') FROM src WHERE key = 86 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION second '2009-08-07 13:14:15')) (TOK_SELEXPR (TOK_FUNCTION second '13:14:15')) (TOK_SELEXPR (TOK_FUNCTION second '2009-08-07'))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -35,19 +32,14 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: second('2009-08-07 13:14:15') - type: int - expr: second('13:14:15') - type: int - expr: second('2009-08-07') - type: int + expressions: second('2009-08-07 13:14:15') (type: int), second('13:14:15') (type: int), second('2009-08-07') (type: int) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT second('2009-08-07 13:14:15'), second('13:14:15'), second('2009-08-07') diff --git ql/src/test/results/clientpositive/udf_sign.q.out ql/src/test/results/clientpositive/udf_sign.q.out index b973aaf..f0851a9 100644 --- ql/src/test/results/clientpositive/udf_sign.q.out +++ ql/src/test/results/clientpositive/udf_sign.q.out @@ -4,9 +4,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select sign(0) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sign 0))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -18,11 +15,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: sign(0) - type: double + expressions: sign(0) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: select sign(0) FROM src tablesample (1 rows) @@ -71,9 +68,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select sign(0) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sign 0))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -85,11 +79,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: sign(0) - type: double + expressions: sign(0) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: select sign(0) FROM src tablesample (1 rows) diff --git ql/src/test/results/clientpositive/udf_size.q.out ql/src/test/results/clientpositive/udf_size.q.out index 22ca0c0..936cf0c 100644 --- ql/src/test/results/clientpositive/udf_size.q.out +++ ql/src/test/results/clientpositive/udf_size.q.out @@ -26,9 +26,6 @@ SELECT size(src_thrift.lint), WHERE src_thrift.lint IS NOT NULL AND NOT (src_thrift.mstringstring IS NULL) LIMIT 1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION size (. (TOK_TABLE_OR_COL src_thrift) lint))) (TOK_SELEXPR (TOK_FUNCTION size (. (TOK_TABLE_OR_COL src_thrift) lintstring))) (TOK_SELEXPR (TOK_FUNCTION size (. (TOK_TABLE_OR_COL src_thrift) mstringstring))) (TOK_SELEXPR (TOK_FUNCTION size TOK_NULL))) (TOK_WHERE (AND (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL src_thrift) lint)) (NOT (TOK_FUNCTION TOK_ISNULL (. (TOK_TABLE_OR_COL src_thrift) mstringstring))))) (TOK_LIMIT 1))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -39,22 +36,17 @@ STAGE PLANS: Processor Tree: TableScan alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: - expr: (lint is not null and (not mstringstring is null)) - type: boolean + predicate: (lint is not null and (not mstringstring is null)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: size(lint) - type: int - expr: size(lintstring) - type: int - expr: size(mstringstring) - type: int - expr: size(null) - type: int + expressions: size(lint) (type: int), size(lintstring) (type: int), size(mstringstring) (type: int), size(null) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE ListSink PREHOOK: query: FROM src_thrift diff --git ql/src/test/results/clientpositive/udf_sort_array.q.out ql/src/test/results/clientpositive/udf_sort_array.q.out index 8eec2c2..536c77d 100644 --- ql/src/test/results/clientpositive/udf_sort_array.q.out +++ ql/src/test/results/clientpositive/udf_sort_array.q.out @@ -27,9 +27,6 @@ POSTHOOK: query: -- Evaluate function against STRING valued keys EXPLAIN SELECT sort_array(array("b", "d", "c", "a")) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sort_array (TOK_FUNCTION array "b" "d" "c" "a")))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -41,11 +38,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: sort_array(array('b','d','c','a')) - type: array + expressions: sort_array(array('b','d','c','a')) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT sort_array(array("f", "a", "g", "c", "b", "d", "e")) FROM src tablesample (1 rows) diff --git ql/src/test/results/clientpositive/udf_space.q.out ql/src/test/results/clientpositive/udf_space.q.out index 781ef92..fd8c2ca 100644 --- ql/src/test/results/clientpositive/udf_space.q.out +++ ql/src/test/results/clientpositive/udf_space.q.out @@ -27,9 +27,6 @@ POSTHOOK: query: EXPLAIN SELECT space(-100) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION space 10)) (TOK_SELEXPR (TOK_FUNCTION space 0)) (TOK_SELEXPR (TOK_FUNCTION space 1)) (TOK_SELEXPR (TOK_FUNCTION space (- 1))) (TOK_SELEXPR (TOK_FUNCTION space (- 100)))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -41,19 +38,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: space(10) - type: string - expr: space(0) - type: string - expr: space(1) - type: string - expr: space((- 1)) - type: string - expr: space((- 100)) - type: string + expressions: space(10) (type: string), space(0) (type: string), space(1) (type: string), space((- 1)) (type: string), space((- 100)) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT diff --git ql/src/test/results/clientpositive/udf_split.q.out ql/src/test/results/clientpositive/udf_split.q.out index 8169068..df2afa4 100644 --- ql/src/test/results/clientpositive/udf_split.q.out +++ ql/src/test/results/clientpositive/udf_split.q.out @@ -25,9 +25,6 @@ POSTHOOK: query: EXPLAIN SELECT split(50401020, 0) FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION split 'a b c' ' ')) (TOK_SELEXPR (TOK_FUNCTION split 'oneAtwoBthreeC' '[ABC]')) (TOK_SELEXPR (TOK_FUNCTION split '' '.')) (TOK_SELEXPR (TOK_FUNCTION split 50401020 0))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -39,17 +36,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: split('a b c', ' ') - type: array - expr: split('oneAtwoBthreeC', '[ABC]') - type: array - expr: split('', '.') - type: array - expr: split(50401020, 0) - type: array + expressions: split('a b c', ' ') (type: array), split('oneAtwoBthreeC', '[ABC]') (type: array), split('', '.') (type: array), split(50401020, 0) (type: array) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT diff --git ql/src/test/results/clientpositive/udf_struct.q.out ql/src/test/results/clientpositive/udf_struct.q.out index 3f6daf0..8bc8b51 100644 --- ql/src/test/results/clientpositive/udf_struct.q.out +++ ql/src/test/results/clientpositive/udf_struct.q.out @@ -16,9 +16,6 @@ POSTHOOK: query: EXPLAIN SELECT struct(1), struct(1, "a"), struct(1, "b", 1.5).col1, struct(1, struct("a", 1.5)).col2.col1 FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION struct 1)) (TOK_SELEXPR (TOK_FUNCTION struct 1 "a")) (TOK_SELEXPR (. (TOK_FUNCTION struct 1 "b" 1.5) col1)) (TOK_SELEXPR (. (. (TOK_FUNCTION struct 1 (TOK_FUNCTION struct "a" 1.5)) col2) col1))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -30,17 +27,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: struct(1) - type: struct - expr: struct(1,'a') - type: struct - expr: struct(1,'b',1.5).col1 - type: int - expr: struct(1,struct('a',1.5)).col2.col1 - type: string + expressions: struct(1) (type: struct), struct(1,'a') (type: struct), struct(1,'b',1.5).col1 (type: int), struct(1,struct('a',1.5)).col2.col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT struct(1), struct(1, "a"), struct(1, "b", 1.5).col1, struct(1, struct("a", 1.5)).col2.col1 diff --git ql/src/test/results/clientpositive/udf_testlength.q.out ql/src/test/results/clientpositive/udf_testlength.q.out index b14f189..4d75482 100644 --- ql/src/test/results/clientpositive/udf_testlength.q.out +++ ql/src/test/results/clientpositive/udf_testlength.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATEFUNCTION POSTHOOK: query: EXPLAIN CREATE TEMPORARY FUNCTION testlength AS 'org.apache.hadoop.hive.ql.udf.UDFTestLength' POSTHOOK: type: CREATEFUNCTION -ABSTRACT SYNTAX TREE: - (TOK_CREATEFUNCTION testlength 'org.apache.hadoop.hive.ql.udf.UDFTestLength') - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/udf_testlength2.q.out ql/src/test/results/clientpositive/udf_testlength2.q.out index 2bcb1f7..8a1e03e 100644 --- ql/src/test/results/clientpositive/udf_testlength2.q.out +++ ql/src/test/results/clientpositive/udf_testlength2.q.out @@ -4,9 +4,6 @@ PREHOOK: type: CREATEFUNCTION POSTHOOK: query: EXPLAIN CREATE TEMPORARY FUNCTION testlength2 AS 'org.apache.hadoop.hive.ql.udf.UDFTestLength2' POSTHOOK: type: CREATEFUNCTION -ABSTRACT SYNTAX TREE: - (TOK_CREATEFUNCTION testlength2 'org.apache.hadoop.hive.ql.udf.UDFTestLength2') - STAGE DEPENDENCIES: Stage-0 is a root stage diff --git ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out index cb2e92b..8b5f6f2 100644 --- ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out +++ ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out @@ -86,9 +86,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- PPD explain select * from (select * from src) a where unix_timestamp(a.key) > 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_FUNCTION unix_timestamp (. (TOK_TABLE_OR_COL a) key)) 10)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -96,31 +93,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (unix_timestamp(_col0) > 10) - type: boolean + predicate: (unix_timestamp(_col0) > 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -134,9 +124,6 @@ PREHOOK: query: explain select * from (select * from src) a where to_unix_timest PREHOOK: type: QUERY POSTHOOK: query: explain select * from (select * from src) a where to_unix_timestamp(a.key) > 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_FUNCTION to_unix_timestamp (. (TOK_TABLE_OR_COL a) key)) 10)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -144,24 +131,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (to_unix_timestamp(key) > 10) - type: boolean + predicate: (to_unix_timestamp(key) > 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf_union.q.out ql/src/test/results/clientpositive/udf_union.q.out index 31e8196..4630b54 100644 --- ql/src/test/results/clientpositive/udf_union.q.out +++ ql/src/test/results/clientpositive/udf_union.q.out @@ -21,9 +21,6 @@ SELECT create_union(0, key), create_union(if(key<100, 0, 1), 2.0, value), create_union(1, "a", struct(2, "b")) FROM src tablesample (2 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION create_union 0 (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION create_union (TOK_FUNCTION if (< (TOK_TABLE_OR_COL key) 100) 0 1) 2.0 (TOK_TABLE_OR_COL value))) (TOK_SELEXPR (TOK_FUNCTION create_union 1 "a" (TOK_FUNCTION struct 2 "b")))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -35,15 +32,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: create_union(0,key) - type: uniontype - expr: create_union(if((key < 100), 0, 1),2.0,value) - type: uniontype - expr: create_union(1,'a',struct(2,'b')) - type: uniontype> + expressions: create_union(0,key) (type: uniontype), create_union(if((key < 100), 0, 1),2.0,value) (type: uniontype), create_union(1,'a',struct(2,'b')) (type: uniontype>) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT create_union(0, key), create_union(if(key<100, 0, 1), 2.0, value), diff --git ql/src/test/results/clientpositive/udf_when.q.out ql/src/test/results/clientpositive/udf_when.q.out index 7823c1b..42736f4 100644 --- ql/src/test/results/clientpositive/udf_when.q.out +++ ql/src/test/results/clientpositive/udf_when.q.out @@ -64,9 +64,6 @@ SELECT CASE END FROM src tablesample (1 rows) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION WHEN (= 1 1) 2 (= 1 3) 4 5)) (TOK_SELEXPR (TOK_FUNCTION WHEN (= 6 7) 8 9)) (TOK_SELEXPR (TOK_FUNCTION WHEN (= 10 11) 12 (= 13 13) 14)) (TOK_SELEXPR (TOK_FUNCTION WHEN (= 15 16) 17 (= 18 19) 20)) (TOK_SELEXPR (TOK_FUNCTION WHEN (= 21 22) TOK_NULL (= 23 23) 24)) (TOK_SELEXPR (TOK_FUNCTION WHEN (= 25 26) 27 (= 28 28) TOK_NULL))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -78,21 +75,11 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: CASE WHEN ((1 = 1)) THEN (2) WHEN ((1 = 3)) THEN (4) ELSE (5) END - type: int - expr: CASE WHEN ((6 = 7)) THEN (8) ELSE (9) END - type: int - expr: CASE WHEN ((10 = 11)) THEN (12) WHEN ((13 = 13)) THEN (14) END - type: int - expr: CASE WHEN ((15 = 16)) THEN (17) WHEN ((18 = 19)) THEN (20) END - type: int - expr: CASE WHEN ((21 = 22)) THEN (null) WHEN ((23 = 23)) THEN (24) END - type: int - expr: CASE WHEN ((25 = 26)) THEN (27) WHEN ((28 = 28)) THEN (null) END - type: int + expressions: CASE WHEN ((1 = 1)) THEN (2) WHEN ((1 = 3)) THEN (4) ELSE (5) END (type: int), CASE WHEN ((6 = 7)) THEN (8) ELSE (9) END (type: int), CASE WHEN ((10 = 11)) THEN (12) WHEN ((13 = 13)) THEN (14) END (type: int), CASE WHEN ((15 = 16)) THEN (17) WHEN ((18 = 19)) THEN (20) END (type: int), CASE WHEN ((21 = 22)) THEN (null) WHEN ((23 = 23)) THEN (24) END (type: int), CASE WHEN ((25 = 26)) THEN (27) WHEN ((28 = 28)) THEN (null) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE ListSink PREHOOK: query: SELECT CASE diff --git ql/src/test/results/clientpositive/udtf_explode.q.out ql/src/test/results/clientpositive/udtf_explode.q.out index d7d004a..5d29f18 100644 --- ql/src/test/results/clientpositive/udtf_explode.q.out +++ ql/src/test/results/clientpositive/udtf_explode.q.out @@ -13,7 +13,29 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol)) (TOK_LIMIT 3))) + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + explode + TOK_FUNCTION + array + 1 + 2 + 3 + myCol + TOK_LIMIT + 3 + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -22,34 +44,27 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: array(1,2,3) - type: array + expressions: array(1,2,3) (type: array) outputColumnNames: _col0 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Limit - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Number of rows: 3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -122,7 +137,52 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol)) (TOK_LIMIT 3))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) myCol)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) myCol)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + explode + TOK_FUNCTION + array + 1 + 2 + 3 + myCol + TOK_LIMIT + 3 + a + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + myCol + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + . + TOK_TABLE_OR_COL + a + myCol + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -132,35 +192,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: array(1,2,3) - type: array + expressions: array(1,2,3) (type: array) outputColumnNames: _col0 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Limit - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Number of rows: 3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE tag: -1 - value expressions: - expr: col - type: int + value expressions: col (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -211,29 +262,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Number of rows: 3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: int + aggregations: count(1) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 @@ -254,24 +296,16 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE tag: -1 - value expressions: - expr: _col1 - type: bigint + value expressions: _col1 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -300,32 +334,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint + expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -383,9 +406,6 @@ PREHOOK: query: EXPLAIN SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION map 1 'one' 2 'two' 3 'three')) myKey myVal)) (TOK_LIMIT 3))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -393,21 +413,23 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: map(1:'one',2:'two',3:'three') - type: map + expressions: map(1:'one',2:'two',3:'three') (type: map) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Limit + Number of rows: 3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -422,7 +444,65 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION map 1 'one' 2 'two' 3 'three')) myKey myVal)) (TOK_LIMIT 3))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) myKey)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) myVal)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) myKey) (. (TOK_TABLE_OR_COL a) myVal)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + explode + TOK_FUNCTION + map + 1 + 'one' + 2 + 'two' + 3 + 'three' + myKey + myVal + TOK_LIMIT + 3 + a + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + myKey + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + myVal + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_GROUPBY + . + TOK_TABLE_OR_COL + a + myKey + . + TOK_TABLE_OR_COL + a + myVal + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -432,37 +512,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a:src + Map Operator Tree: TableScan alias: src - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE GatherStats: false Select Operator - expressions: - expr: map(1:'one',2:'two',3:'three') - type: map + expressions: map(1:'one',2:'two',3:'three') (type: map) outputColumnNames: _col0 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: explode Limit - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Number of rows: 3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE tag: -1 - value expressions: - expr: key - type: int - expr: value - type: string + value expressions: key (type: int), value (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -513,33 +582,20 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Limit - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Number of rows: 3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: int - expr: _col1 - type: string + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 @@ -560,28 +616,16 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: string + key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: int - expr: _col1 - type: string - Statistics: - numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE tag: -1 - value expressions: - expr: _col2 - type: bigint + value expressions: _col2 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -610,36 +654,21 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/udtf_json_tuple.q.out ql/src/test/results/clientpositive/udtf_json_tuple.q.out index 1a480b6..384fcc0 100644 --- ql/src/test/results/clientpositive/udtf_json_tuple.q.out +++ ql/src/test/results/clientpositive/udtf_json_tuple.q.out @@ -47,9 +47,6 @@ select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', ' POSTHOOK: type: QUERY POSTHOOK: Lineage: json_t.jstring EXPRESSION [] POSTHOOK: Lineage: json_t.key EXPRESSION [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5 (TOK_TABALIAS b))) (TOK_TABREF (TOK_TABNAME json_t) a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -57,110 +54,53 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col8 - type: string + expressions: _col0 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Select Operator - expressions: - expr: jstring - type: string - expr: 'f1' - type: string - expr: 'f2' - type: string - expr: 'f3' - type: string - expr: 'f4' - type: string - expr: 'f5' - type: string + expressions: jstring (type: string), 'f1' (type: string), 'f2' (type: string), 'f3' (type: string), 'f4' (type: string), 'f5' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE function name: json_tuple Lateral View Join Operator outputColumnNames: _col0, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col8 - type: string + expressions: _col0 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -194,9 +134,6 @@ select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f POSTHOOK: type: QUERY POSTHOOK: Lineage: json_t.jstring EXPRESSION [] POSTHOOK: Lineage: json_t.key EXPRESSION [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME json_t) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL f1)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL f2)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL f3))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -204,53 +141,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: jstring - type: string - expr: 'f1' - type: string - expr: 'f2' - type: string - expr: 'f3' - type: string - expr: 'f4' - type: string - expr: 'f5' - type: string + expressions: jstring (type: string), 'f1' (type: string), 'f2' (type: string), 'f3' (type: string), 'f4' (type: string), 'f5' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE function name: json_tuple Reduce Output Operator - key expressions: - expr: c0 - type: string - expr: c1 - type: string - expr: c2 - type: string + key expressions: c0 (type: string), c1 (type: string), c2 (type: string) sort order: +++ - tag: -1 - value expressions: - expr: c0 - type: string - expr: c1 - type: string - expr: c2 - type: string - expr: c3 - type: string - expr: c4 - type: string + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE + value expressions: c0 (type: string), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -284,9 +196,6 @@ select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', POSTHOOK: type: QUERY POSTHOOK: Lineage: json_t.jstring EXPRESSION [] POSTHOOK: Lineage: json_t.key EXPRESSION [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5 (TOK_TABALIAS b))) (TOK_TABREF (TOK_TABNAME json_t) a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) f2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) f5))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -294,86 +203,53 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - expr: _col8 - type: string + expressions: _col0 (type: string), _col5 (type: string), _col8 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Select Operator - expressions: - expr: jstring - type: string - expr: 'f1' - type: string - expr: 'f2' - type: string - expr: 'f3' - type: string - expr: 'f4' - type: string - expr: 'f5' - type: string + expressions: jstring (type: string), 'f1' (type: string), 'f2' (type: string), 'f3' (type: string), 'f4' (type: string), 'f5' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE function name: json_tuple Lateral View Join Operator outputColumnNames: _col0, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - expr: _col8 - type: string + expressions: _col0 (type: string), _col5 (type: string), _col8 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -407,9 +283,6 @@ select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', POSTHOOK: type: QUERY POSTHOOK: Lineage: json_t.jstring EXPRESSION [] POSTHOOK: Lineage: json_t.key EXPRESSION [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5 (TOK_TABALIAS b))) (TOK_TABREF (TOK_TABNAME json_t) a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL f2)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL f1))) (TOK_GROUPBY (TOK_TABLE_OR_COL f2)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL f2))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -418,113 +291,78 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: _col4 is not null - type: boolean + predicate: _col4 is not null (type: boolean) + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col5 - type: string + expressions: _col5 (type: string) outputColumnNames: _col5 + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col5 - type: string + aggregations: count() + keys: _col5 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Select Operator - expressions: - expr: jstring - type: string - expr: 'f1' - type: string - expr: 'f2' - type: string - expr: 'f3' - type: string - expr: 'f4' - type: string - expr: 'f5' - type: string + expressions: jstring (type: string), 'f1' (type: string), 'f2' (type: string), 'f3' (type: string), 'f4' (type: string), 'f5' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE function name: json_tuple Lateral View Join Operator outputColumnNames: _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 12 Data size: 472 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: _col4 is not null - type: boolean + predicate: _col4 is not null (type: boolean) + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col5 - type: string + expressions: _col5 (type: string) outputColumnNames: _col5 + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col5 - type: string + aggregations: count() + keys: _col5 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 236 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 118 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 118 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -532,25 +370,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + Statistics: Num rows: 3 Data size: 118 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 118 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 118 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out index a38b31b..8373d8e 100644 --- ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out +++ ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out @@ -68,9 +68,6 @@ select a.key, b.* from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', ' POSTHOOK: type: QUERY POSTHOOK: Lineage: url_t.fullurl EXPRESSION [] POSTHOOK: Lineage: url_t.key EXPRESSION [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION parse_url_tuple (. (TOK_TABLE_OR_COL a) fullurl) 'HOST' 'PATH' 'QUERY' 'REF' 'PROTOCOL' 'FILE' 'AUTHORITY' 'USERINFO' 'QUERY:k1') ho pa qu re pr fi au us qk1 (TOK_TABALIAS b))) (TOK_TABREF (TOK_TABNAME url_t) a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -78,150 +75,53 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col10 - type: string - expr: _col11 - type: string - expr: _col12 - type: string + expressions: _col0 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) Select Operator - expressions: - expr: fullurl - type: string - expr: 'HOST' - type: string - expr: 'PATH' - type: string - expr: 'QUERY' - type: string - expr: 'REF' - type: string - expr: 'PROTOCOL' - type: string - expr: 'FILE' - type: string - expr: 'AUTHORITY' - type: string - expr: 'USERINFO' - type: string - expr: 'QUERY:k1' - type: string + expressions: fullurl (type: string), 'HOST' (type: string), 'PATH' (type: string), 'QUERY' (type: string), 'REF' (type: string), 'PROTOCOL' (type: string), 'FILE' (type: string), 'AUTHORITY' (type: string), 'USERINFO' (type: string), 'QUERY:k1' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE function name: parse_url_tuple Lateral View Join Operator outputColumnNames: _col0, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - expr: _col10 - type: string - expr: _col11 - type: string - expr: _col12 - type: string + expressions: _col0 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - expr: _col7 - type: string - expr: _col8 - type: string - expr: _col9 - type: string + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -255,9 +155,6 @@ select parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'F POSTHOOK: type: QUERY POSTHOOK: Lineage: url_t.fullurl EXPRESSION [] POSTHOOK: Lineage: url_t.key EXPRESSION [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME url_t) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION parse_url_tuple (. (TOK_TABLE_OR_COL a) fullurl) 'HOST' 'PATH' 'QUERY' 'REF' 'PROTOCOL' 'FILE' 'AUTHORITY' 'USERINFO' 'QUERY:k1') ho pa qu re pr fi au us qk1)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ho)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL pa)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL qu))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -265,69 +162,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: fullurl - type: string - expr: 'HOST' - type: string - expr: 'PATH' - type: string - expr: 'QUERY' - type: string - expr: 'REF' - type: string - expr: 'PROTOCOL' - type: string - expr: 'FILE' - type: string - expr: 'AUTHORITY' - type: string - expr: 'USERINFO' - type: string - expr: 'QUERY:k1' - type: string + expressions: fullurl (type: string), 'HOST' (type: string), 'PATH' (type: string), 'QUERY' (type: string), 'REF' (type: string), 'PROTOCOL' (type: string), 'FILE' (type: string), 'AUTHORITY' (type: string), 'USERINFO' (type: string), 'QUERY:k1' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE function name: parse_url_tuple Reduce Output Operator - key expressions: - expr: c0 - type: string - expr: c1 - type: string - expr: c2 - type: string + key expressions: c0 (type: string), c1 (type: string), c2 (type: string) sort order: +++ - tag: -1 - value expressions: - expr: c0 - type: string - expr: c1 - type: string - expr: c2 - type: string - expr: c3 - type: string - expr: c4 - type: string - expr: c5 - type: string - expr: c6 - type: string - expr: c7 - type: string - expr: c8 - type: string + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE + value expressions: c0 (type: string), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string), c5 (type: string), c6 (type: string), c7 (type: string), c8 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -363,9 +219,6 @@ select a.key, b.ho, b.qu, b.qk1, b.err1, b.err2, b.err3 from url_t a lateral vie POSTHOOK: type: QUERY POSTHOOK: Lineage: url_t.fullurl EXPRESSION [] POSTHOOK: Lineage: url_t.key EXPRESSION [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION parse_url_tuple (. (TOK_TABLE_OR_COL a) fullurl) 'HOST' 'PATH' 'QUERY' 'REF' 'PROTOCOL' 'FILE' 'AUTHORITY' 'USERINFO' 'QUERY:k1' 'host' 'query' 'QUERY:nonExistCol') ho pa qu re pr fi au us qk1 err1 err2 err3 (TOK_TABALIAS b))) (TOK_TABREF (TOK_TABNAME url_t) a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) ho)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) qu)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) qk1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) err1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) err2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) err3))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -373,132 +226,53 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string - expr: _col6 - type: string - expr: _col12 - type: string - expr: _col13 - type: string - expr: _col14 - type: string - expr: _col15 - type: string + expressions: _col0 (type: string), _col4 (type: string), _col6 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: string + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) Select Operator - expressions: - expr: fullurl - type: string - expr: 'HOST' - type: string - expr: 'PATH' - type: string - expr: 'QUERY' - type: string - expr: 'REF' - type: string - expr: 'PROTOCOL' - type: string - expr: 'FILE' - type: string - expr: 'AUTHORITY' - type: string - expr: 'USERINFO' - type: string - expr: 'QUERY:k1' - type: string - expr: 'host' - type: string - expr: 'query' - type: string - expr: 'QUERY:nonExistCol' - type: string + expressions: fullurl (type: string), 'HOST' (type: string), 'PATH' (type: string), 'QUERY' (type: string), 'REF' (type: string), 'PROTOCOL' (type: string), 'FILE' (type: string), 'AUTHORITY' (type: string), 'USERINFO' (type: string), 'QUERY:k1' (type: string), 'host' (type: string), 'query' (type: string), 'QUERY:nonExistCol' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE function name: parse_url_tuple Lateral View Join Operator outputColumnNames: _col0, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col4 - type: string - expr: _col6 - type: string - expr: _col12 - type: string - expr: _col13 - type: string - expr: _col14 - type: string - expr: _col15 - type: string + expressions: _col0 (type: string), _col4 (type: string), _col6 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col6 - type: string + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -532,9 +306,6 @@ select ho, count(*) from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', POSTHOOK: type: QUERY POSTHOOK: Lineage: url_t.fullurl EXPRESSION [] POSTHOOK: Lineage: url_t.key EXPRESSION [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION parse_url_tuple (. (TOK_TABLE_OR_COL a) fullurl) 'HOST' 'PATH' 'QUERY' 'REF' 'PROTOCOL' 'FILE' 'AUTHORITY' 'USERINFO' 'QUERY:k1') ho pa qu re pr fi au us qk1 (TOK_TABALIAS b))) (TOK_TABREF (TOK_TABNAME url_t) a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ho)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL qk1))) (TOK_GROUPBY (TOK_TABLE_OR_COL ho)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -542,121 +313,79 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: _col12 is not null - type: boolean + predicate: _col12 is not null (type: boolean) + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: string + expressions: _col4 (type: string) outputColumnNames: _col4 + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col4 - type: string + aggregations: count() + keys: _col4 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Select Operator - expressions: - expr: fullurl - type: string - expr: 'HOST' - type: string - expr: 'PATH' - type: string - expr: 'QUERY' - type: string - expr: 'REF' - type: string - expr: 'PROTOCOL' - type: string - expr: 'FILE' - type: string - expr: 'AUTHORITY' - type: string - expr: 'USERINFO' - type: string - expr: 'QUERY:k1' - type: string + expressions: fullurl (type: string), 'HOST' (type: string), 'PATH' (type: string), 'QUERY' (type: string), 'REF' (type: string), 'PROTOCOL' (type: string), 'FILE' (type: string), 'AUTHORITY' (type: string), 'USERINFO' (type: string), 'QUERY:k1' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE function name: parse_url_tuple Lateral View Join Operator outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 12 Data size: 426 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: _col12 is not null - type: boolean + predicate: _col12 is not null (type: boolean) + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col4 - type: string + expressions: _col4 (type: string) outputColumnNames: _col4 + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col4 - type: string + aggregations: count() + keys: _col4 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 213 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 106 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 106 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 106 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udtf_stack.q.out ql/src/test/results/clientpositive/udtf_stack.q.out index 0daa8a0..1ce8a89 100644 --- ql/src/test/results/clientpositive/udtf_stack.q.out +++ ql/src/test/results/clientpositive/udtf_stack.q.out @@ -7,9 +7,6 @@ PREHOOK: query: EXPLAIN SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z') a AS x, y LIMIT 2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION STACK 2 'x' (TOK_FUNCTION array 1) 'z') x y (TOK_TABALIAS a))) (TOK_TABREF (TOK_TABNAME src)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x)) (TOK_SELEXPR (TOK_TABLE_OR_COL y))) (TOK_LIMIT 2))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -17,55 +14,51 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Forward + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4, _col5 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: string - expr: _col5 - type: array + expressions: _col4 (type: string), _col5 (type: array) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: 2 - type: int - expr: 'x' - type: string - expr: array(1) - type: array - expr: 'z' - type: string + expressions: 2 (type: int), 'x' (type: string), array(1) (type: array), 'z' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: stack Lateral View Join Operator outputColumnNames: _col4, _col5 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: string - expr: _col5 - type: array + expressions: _col4 (type: string), _col5 (type: array) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -79,9 +72,6 @@ PREHOOK: query: EXPLAIN SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z', array(4)) a AS x, y LIMIT 2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION STACK 2 'x' (TOK_FUNCTION array 1) 'z' (TOK_FUNCTION array 4)) x y (TOK_TABALIAS a))) (TOK_TABREF (TOK_TABNAME src)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x)) (TOK_SELEXPR (TOK_TABLE_OR_COL y))) (TOK_LIMIT 2))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -89,57 +79,51 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Forward + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4, _col5 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: string - expr: _col5 - type: array + expressions: _col4 (type: string), _col5 (type: array) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: - expr: 2 - type: int - expr: 'x' - type: string - expr: array(1) - type: array - expr: 'z' - type: string - expr: array(4) - type: array + expressions: 2 (type: int), 'x' (type: string), array(1) (type: array), 'z' (type: string), array(4) (type: array) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE UDTF Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE function name: stack Lateral View Join Operator outputColumnNames: _col4, _col5 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: _col4 - type: string - expr: _col5 - type: array + expressions: _col4 (type: string), _col5 (type: array) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE Limit + Number of rows: 2 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 11624 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union.q.out ql/src/test/results/clientpositive/union.q.out index bce7311..6627ad7 100644 --- ql/src/test/results/clientpositive/union.q.out +++ ql/src/test/results/clientpositive/union.q.out @@ -18,9 +18,6 @@ FROM ( ) unioninput INSERT OVERWRITE DIRECTORY 'target/warehouse/union.out' SELECT unioninput.* POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL src) key) 100)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL src) key) 100))))) unioninput)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR 'target/warehouse/union.out')) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME unioninput)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-6 depends on stages: Stage-1 , consists of Stage-3, Stage-2, Stage-4 @@ -33,61 +30,49 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:unioninput-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 100) - type: boolean + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 18 Data size: 3606 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 18 Data size: 3606 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 18 Data size: 3606 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - null-subquery2:unioninput-subquery2:src TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key > 100) - type: boolean + predicate: (key > 100) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 18 Data size: 3606 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 18 Data size: 3606 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 18 Data size: 3606 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -110,12 +95,10 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -123,12 +106,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union10.q.out ql/src/test/results/clientpositive/union10.q.out index 0f61d88..9342735 100644 --- ql/src/test/results/clientpositive/union10.q.out +++ ql/src/test/results/clientpositive/union10.q.out @@ -23,9 +23,6 @@ insert overwrite table tmptable UNION ALL select 'tst3' as key, count(1) as value from src s3) unionsrc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst2' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst3' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME tmptable))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-9, Stage-10 @@ -42,40 +39,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery1:unionsrc-subquery1-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst1' - type: string - expr: _col0 - type: bigint + expressions: 'tst1' (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -83,56 +73,47 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -163,12 +144,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -177,12 +156,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -197,40 +174,33 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery2:unionsrc-subquery1-subquery2:s2 + Map Operator Tree: TableScan alias: s2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst2' - type: string - expr: _col0 - type: bigint + expressions: 'tst2' (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -238,40 +208,33 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:unionsrc-subquery2:s3 + Map Operator Tree: TableScan alias: s3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst3' - type: string - expr: _col0 - type: bigint + expressions: 'tst3' (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/union11.q.out ql/src/test/results/clientpositive/union11.q.out index a7d50ba..f226f35 100644 --- ql/src/test/results/clientpositive/union11.q.out +++ ql/src/test/results/clientpositive/union11.q.out @@ -16,9 +16,6 @@ explain UNION ALL select 'tst3' as key, count(1) as value from src s3) unionsrc group by unionsrc.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst2' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst3' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL unionsrc) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3, Stage-4 @@ -29,40 +26,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery1:unionsrc-subquery1-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst1' - type: string - expr: _col0 - type: bigint + expressions: 'tst1' (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -70,114 +60,78 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint -#### A masked pattern was here #### + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) TableScan Union + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint -#### A masked pattern was here #### + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) TableScan Union + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -185,40 +139,33 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery2:unionsrc-subquery1-subquery2:s2 + Map Operator Tree: TableScan alias: s2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst2' - type: string - expr: _col0 - type: bigint + expressions: 'tst2' (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -226,40 +173,33 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:unionsrc-subquery2:s3 + Map Operator Tree: TableScan alias: s3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst3' - type: string - expr: _col0 - type: bigint + expressions: 'tst3' (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/union12.q.out ql/src/test/results/clientpositive/union12.q.out index 6b9d826..c5441c3 100644 --- ql/src/test/results/clientpositive/union12.q.out +++ ql/src/test/results/clientpositive/union12.q.out @@ -23,9 +23,6 @@ insert overwrite table tmptable UNION ALL select 'tst3' as key, count(1) as value from srcbucket s3) unionsrc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst2' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcbucket) s3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst3' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME tmptable))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-9, Stage-10 @@ -42,40 +39,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery1:unionsrc-subquery1-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst1' - type: string - expr: _col0 - type: bigint + expressions: 'tst1' (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -83,56 +73,47 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -163,12 +144,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -177,12 +156,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -197,40 +174,33 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery2:unionsrc-subquery1-subquery2:s2 + Map Operator Tree: TableScan alias: s2 + Statistics: Num rows: 0 Data size: 216 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 216 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst2' - type: string - expr: _col0 - type: bigint + expressions: 'tst2' (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -238,40 +208,33 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:unionsrc-subquery2:s3 + Map Operator Tree: TableScan alias: s3 + Statistics: Num rows: 0 Data size: 11603 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 11603 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst3' - type: string - expr: _col0 - type: bigint + expressions: 'tst3' (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/union13.q.out ql/src/test/results/clientpositive/union13.q.out index 1802778..bd9d4da 100644 --- ql/src/test/results/clientpositive/union13.q.out +++ ql/src/test/results/clientpositive/union13.q.out @@ -10,9 +10,6 @@ explain select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s1) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s1) value) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -20,53 +17,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:unionsrc-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - null-subquery2:unionsrc-subquery2:s2 TableScan alias: s2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union14.q.out ql/src/test/results/clientpositive/union14.q.out index c7123dd..a6d349b 100644 --- ql/src/test/results/clientpositive/union14.q.out +++ ql/src/test/results/clientpositive/union14.q.out @@ -14,9 +14,6 @@ explain select 'tst1' as key, cast(count(1) as string) as value from src s1) unionsrc group by unionsrc.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_FUNCTION count 1)) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL unionsrc) key)))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 @@ -25,40 +22,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:unionsrc-subquery2:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst1' - type: string - expr: UDFToString(_col0) - type: string + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -66,93 +56,65 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - TableScan - Union - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - null-subquery1:unionsrc-subquery1:s2 + Map Operator Tree: TableScan alias: s2 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 2 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + TableScan + Union + Statistics: Num rows: 2 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union15.q.out ql/src/test/results/clientpositive/union15.q.out index 936589e..88c9553 100644 --- ql/src/test/results/clientpositive/union15.q.out +++ ql/src/test/results/clientpositive/union15.q.out @@ -16,9 +16,6 @@ explain UNION ALL select s3.key as key, s3.value as value from src1 s3) unionsrc group by unionsrc.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_FUNCTION count 1)) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) value))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) s3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s3) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s3) value) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL unionsrc) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -27,40 +24,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery1:unionsrc-subquery1-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst1' - type: string - expr: UDFToString(_col0) - type: string + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -68,130 +58,90 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 3 Data size: 704 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 704 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - null-subquery1-subquery2:unionsrc-subquery1-subquery2:s2 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: bigint) TableScan alias: s2 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 3 Data size: 704 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 704 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - null-subquery2:unionsrc-subquery2:s3 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: bigint) TableScan alias: s3 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 3 Data size: 704 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 704 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union16.q.out ql/src/test/results/clientpositive/union16.q.out index a9a6e14..2bd8d5e 100644 --- ql/src/test/results/clientpositive/union16.q.out +++ ql/src/test/results/clientpositive/union16.q.out @@ -62,9 +62,6 @@ SELECT count(1) FROM ( SELECT key, value FROM src UNION ALL SELECT key, value FROM src) src POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -72,622 +69,520 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery1-subquery2:src-subquery1-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery2:src-subquery1-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery2:src-subquery2:src + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 725 Data size: 145300 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union17.q.out ql/src/test/results/clientpositive/union17.q.out index bfc73ed..3efc03c 100644 --- ql/src/test/results/clientpositive/union17.q.out +++ ql/src/test/results/clientpositive/union17.q.out @@ -26,9 +26,6 @@ FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key, unionsrc.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_FUNCTION count 1)) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL unionsrc) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL unionsrc) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL unionsrc) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL unionsrc) key) (. (TOK_TABLE_OR_COL unionsrc) value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -42,40 +39,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:unionsrc-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst1' - type: string - expr: UDFToString(_col0) - type: string + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -83,81 +73,54 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 30 Data size: 6084 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: substr(_col1, 5) - type: string + key expressions: substr(_col1, 5) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(_col1, 5) - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - null-subquery2:unionsrc-subquery2:s2 + Map-reduce partition columns: substr(_col1, 5) (type: string) + Statistics: Num rows: 30 Data size: 6084 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: s2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 30 Data size: 6084 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: substr(_col1, 5) - type: string + key expressions: substr(_col1, 5) (type: string) sort order: + - Map-reduce partition columns: - expr: substr(_col1, 5) - type: string - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: substr(_col1, 5) (type: string) + Statistics: Num rows: 30 Data size: 6084 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Forward + Statistics: Num rows: 30 Data size: 6084 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 2640 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0) - bucketGroup: false - keys: - expr: VALUE._col0 - type: string - expr: VALUE._col1 - type: string + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string), VALUE._col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 8160 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -165,41 +128,28 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 30 Data size: 2640 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 15 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -221,49 +171,28 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 30 Data size: 8160 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 4080 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 4200 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 15 Data size: 4200 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union18.q.out ql/src/test/results/clientpositive/union18.q.out index 412bb87..1933ac4 100644 --- ql/src/test/results/clientpositive/union18.q.out +++ ql/src/test/results/clientpositive/union18.q.out @@ -26,9 +26,6 @@ FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, unionsrc.value INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, unionsrc.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_FUNCTION count 1)) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -50,40 +47,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:unionsrc-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst1' - type: string - expr: UDFToString(_col0) - type: string + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -91,80 +81,62 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 30 Data size: 6084 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 8160 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 30 Data size: 8160 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 13680 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 30 Data size: 13680 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - null-subquery2:unionsrc-subquery2:s2 TableScan alias: s2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 30 Data size: 6084 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 8160 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 30 Data size: 8160 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 13680 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 30 Data size: 13680 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -195,12 +167,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -209,12 +179,10 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -251,12 +219,10 @@ STAGE PLANS: Stage: Stage-11 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -265,12 +231,10 @@ STAGE PLANS: Stage: Stage-13 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union19.q.out ql/src/test/results/clientpositive/union19.q.out index f2911e6..ff61868 100644 --- ql/src/test/results/clientpositive/union19.q.out +++ ql/src/test/results/clientpositive/union19.q.out @@ -26,9 +26,6 @@ FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, count(unionsrc.value) group by unionsrc.key INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, unionsrc.value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_FUNCTION count 1)) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL unionsrc) value)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL unionsrc) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value))))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -40,40 +37,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:unionsrc-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst1' - type: string - expr: UDFToString(_col0) - type: string + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -81,106 +71,70 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 30 Data size: 6084 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 6084 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: - expr: count(_col1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 2640 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 30 Data size: 2640 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: bigint) Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 13680 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 30 Data size: 13680 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - null-subquery2:unionsrc-subquery2:s2 TableScan alias: s2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 30 Data size: 6084 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 6084 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: - expr: count(_col1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 2640 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 30 Data size: 2640 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: bigint) Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 13680 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 30 Data size: 13680 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -188,24 +142,18 @@ STAGE PLANS: name: default.dest2 Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 15 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union2.q.out ql/src/test/results/clientpositive/union2.q.out index 5e58d6a..0fac9d9 100644 --- ql/src/test/results/clientpositive/union2.q.out +++ ql/src/test/results/clientpositive/union2.q.out @@ -10,9 +10,6 @@ explain select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s1) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s1) value) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -20,70 +17,60 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:unionsrc-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery2:unionsrc-subquery2:s2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: s2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union20.q.out ql/src/test/results/clientpositive/union20.q.out index 5548288..b6003bc 100644 --- ql/src/test/results/clientpositive/union20.q.out +++ ql/src/test/results/clientpositive/union20.q.out @@ -24,9 +24,6 @@ JOIN select s4.key as key, s4.value as value from src s4 where s4.key < 10) unionsrc2 ON (unionsrc1.key = unionsrc2.key) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_FUNCTION count 1)) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) value)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL s2) key) 10))))) unionsrc1) (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_FUNCTION count 1)) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s4)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s4) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s4) value) value)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL s4) key) 10))))) unionsrc2) (= (. (TOK_TABLE_OR_COL unionsrc1) key) (. (TOK_TABLE_OR_COL unionsrc2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc1) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc2) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc2) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-6 @@ -36,40 +33,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:unionsrc2-subquery1:s3 + Map Operator Tree: TableScan alias: s3 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst1' - type: string - expr: UDFToString(_col0) - type: string + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -77,99 +67,61 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - TableScan - Union - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 10 Data size: 2075 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - null-subquery2:unionsrc1-subquery2:s2 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 2075 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string) TableScan - alias: s2 + alias: s4 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 10 Data size: 2075 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - null-subquery2:unionsrc2-subquery2:s4 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 2075 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string) TableScan - alias: s4 + alias: s2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key < 10) - type: boolean + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1803 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 10 Data size: 2075 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 2075 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string) + TableScan + Union + Statistics: Num rows: 10 Data size: 2075 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 2075 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -177,22 +129,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 50 Data size: 27200 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 50 Data size: 27200 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 50 Data size: 27200 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -200,40 +145,33 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:unionsrc1-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst1' - type: string - expr: UDFToString(_col0) - type: string + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/union21.q.out ql/src/test/results/clientpositive/union21.q.out index efa939e..9a91d7a 100644 --- ql/src/test/results/clientpositive/union21.q.out +++ ql/src/test/results/clientpositive/union21.q.out @@ -32,9 +32,6 @@ FROM ( ) union_output GROUP BY key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR '1' key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION reverse (TOK_TABLE_OR_COL key)) key))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) key))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL astring) key))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_thrift))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR ([ (TOK_TABLE_OR_COL lstring) 0) key))))) union_output)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -42,202 +39,146 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery1-subquery1-subquery1:union_output-subquery1-subquery1-subquery1-subquery1:src + Map Operator Tree: TableScan - alias: src + alias: src_thrift + Statistics: Num rows: 16 Data size: 1606 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: '1' - type: string + expressions: astring (type: string) outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 1606 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 132 Data size: 20648 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 132 Data size: 20648 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 132 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - null-subquery1-subquery1-subquery1-subquery2:union_output-subquery1-subquery1-subquery1-subquery2:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 132 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col1 (type: bigint) TableScan - alias: src + alias: src_thrift + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: reverse(key) - type: string + expressions: lstring[0] (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 1606 Basic stats: PARTIAL Column stats: NONE Union + Statistics: Num rows: 132 Data size: 20648 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 132 Data size: 20648 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 132 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - null-subquery1-subquery1-subquery2:union_output-subquery1-subquery1-subquery2:src + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 132 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col1 (type: bigint) TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 132 Data size: 20648 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 132 Data size: 20648 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 132 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - null-subquery1-subquery2:union_output-subquery1-subquery2:src_thrift + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 132 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col1 (type: bigint) TableScan - alias: src_thrift + alias: src + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - expressions: - expr: astring - type: string + expressions: '1' (type: string) outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Union + Statistics: Num rows: 132 Data size: 20648 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 132 Data size: 20648 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 132 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - null-subquery2:union_output-subquery2:src_thrift + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 132 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col1 (type: bigint) TableScan - alias: src_thrift + alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: lstring[0] - type: string + expressions: reverse(key) (type: string) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 132 Data size: 20648 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 132 Data size: 20648 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 132 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 132 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 66 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 66 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 66 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union22.q.out ql/src/test/results/clientpositive/union22.q.out index 48a58f1..eb5ad5c 100644 --- ql/src/test/results/clientpositive/union22.q.out +++ ql/src/test/results/clientpositive/union22.q.out @@ -83,7 +83,144 @@ POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSc POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k5 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dst_union22_delta))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k1) k1) (TOK_SELEXPR (TOK_TABLE_OR_COL k2) k2) (TOK_SELEXPR (TOK_TABLE_OR_COL k3) k3) (TOK_SELEXPR (TOK_TABLE_OR_COL k4) k4)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (<= (TOK_TABLE_OR_COL k0) 50))))) (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME dst_union22) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dst_union22_delta))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (> (TOK_TABLE_OR_COL k0) 50))))) b) (and (= (. (TOK_TABLE_OR_COL a) k1) (. (TOK_TABLE_OR_COL b) k1)) (= (. (TOK_TABLE_OR_COL a) ds) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) k1) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) k2) k2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k3) k3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k4) k4)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL a) k1) 20))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dst_union22) (TOK_PARTSPEC (TOK_PARTVAL ds '2')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_UNION + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + dst_union22_delta + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + k1 + k1 + TOK_SELEXPR + TOK_TABLE_OR_COL + k2 + k2 + TOK_SELEXPR + TOK_TABLE_OR_COL + k3 + k3 + TOK_SELEXPR + TOK_TABLE_OR_COL + k4 + k4 + TOK_WHERE + and + = + TOK_TABLE_OR_COL + ds + '1' + <= + TOK_TABLE_OR_COL + k0 + 50 + TOK_QUERY + TOK_FROM + TOK_LEFTOUTERJOIN + TOK_TABREF + TOK_TABNAME + dst_union22 + a + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + dst_union22_delta + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + = + TOK_TABLE_OR_COL + ds + '1' + > + TOK_TABLE_OR_COL + k0 + 50 + b + and + = + . + TOK_TABLE_OR_COL + a + k1 + . + TOK_TABLE_OR_COL + b + k1 + = + . + TOK_TABLE_OR_COL + a + ds + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + k1 + k1 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + k2 + k2 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k3 + k3 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k4 + k4 + TOK_WHERE + > + . + TOK_TABLE_OR_COL + a + k1 + 20 + subq + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dst_union22 + TOK_PARTSPEC + TOK_PARTVAL + ds + '2' + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + STAGE DEPENDENCIES: Stage-7 is a root stage , consists of Stage-8, Stage-1 @@ -152,17 +289,9 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((k0 > 50) and (k1 > 20)) - type: boolean + predicate: ((k0 > 50) and (k1 > 20)) (type: boolean) Select Operator - expressions: - expr: k1 - type: string - expr: k3 - type: string - expr: k4 - type: string + expressions: k1 (type: string), k3 (type: string), k4 (type: string) outputColumnNames: _col1, _col3, _col4 HashTable Sink Operator condition expressions: @@ -173,24 +302,20 @@ STAGE PLANS: filter predicates: 0 {(ds = '1')} 1 - handleSkewJoin: false keys: - 0 [Column[k1]] - 1 [Column[_col1]] + 0 k1 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:subq-subquery2:a + Map Operator Tree: TableScan alias: a GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (k1 > 20) - type: boolean + predicate: (k1 > 20) (type: boolean) Map Join Operator condition map: Left Outer Join0 to 1 @@ -202,22 +327,13 @@ STAGE PLANS: filter predicates: 0 {(ds = '1')} 1 - handleSkewJoin: false keys: - 0 [Column[k1]] - 1 [Column[_col1]] + 0 k1 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col10, _col11 Position of Big Table: 0 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col10 - type: string - expr: _col11 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false @@ -328,34 +444,22 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Union - Statistics: - numRows: 348 dataSize: 9684 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 348 dataSize: 9684 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2/ - Statistics: - numRows: 348 dataSize: 9684 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -376,56 +480,31 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - null-subquery1:subq-subquery1:dst_union22_delta TableScan alias: dst_union22_delta - Statistics: - numRows: 500 dataSize: 16936 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 16936 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (k0 <= 50) - type: boolean - Statistics: - numRows: 166 dataSize: 5622 basicStatsState: COMPLETE colStatsState: NONE + predicate: (k0 <= 50) (type: boolean) + Statistics: Num rows: 166 Data size: 5622 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: k1 - type: string - expr: k2 - type: string - expr: k3 - type: string - expr: k4 - type: string + expressions: k1 (type: string), k2 (type: string), k3 (type: string), k4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 166 dataSize: 5622 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 166 Data size: 5622 Basic stats: COMPLETE Column stats: NONE Union - Statistics: - numRows: 348 dataSize: 9684 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 348 dataSize: 9684 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2/ - Statistics: - numRows: 348 dataSize: 9684 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -544,78 +623,41 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:subq-subquery2:a - TableScan - alias: a - Statistics: - numRows: 500 dataSize: 11124 basicStatsState: COMPLETE colStatsState: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (k1 > 20) - type: boolean - Statistics: - numRows: 166 dataSize: 3693 basicStatsState: COMPLETE colStatsState: NONE - Reduce Output Operator - key expressions: - expr: k1 - type: string - sort order: + - Map-reduce partition columns: - expr: k1 - type: string - Statistics: - numRows: 166 dataSize: 3693 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: k1 - type: string - expr: k2 - type: string - expr: ds - type: string - null-subquery2:subq-subquery2:b:dst_union22_delta + Map Operator Tree: TableScan alias: dst_union22_delta - Statistics: - numRows: 500 dataSize: 16936 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 500 Data size: 16936 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: ((k0 > 50) and (k1 > 20)) - type: boolean - Statistics: - numRows: 55 dataSize: 1862 basicStatsState: COMPLETE colStatsState: NONE + predicate: ((k0 > 50) and (k1 > 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 1862 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: k1 - type: string - expr: k3 - type: string - expr: k4 - type: string + expressions: k1 (type: string), k3 (type: string), k4 (type: string) outputColumnNames: _col1, _col3, _col4 - Statistics: - numRows: 55 dataSize: 1862 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 55 Data size: 1862 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - Statistics: - numRows: 55 dataSize: 1862 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 55 Data size: 1862 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: - expr: _col3 - type: string - expr: _col4 - type: string + value expressions: _col3 (type: string), _col4 (type: string) + TableScan + alias: a + Statistics: Num rows: 500 Data size: 11124 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (k1 > 20) (type: boolean) + Statistics: Num rows: 166 Data size: 3693 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: k1 (type: string) + sort order: + + Map-reduce partition columns: k1 (type: string) + Statistics: Num rows: 166 Data size: 3693 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: k1 (type: string), k2 (type: string), ds (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -717,23 +759,12 @@ STAGE PLANS: filter predicates: 0 {(VALUE._col4 = '1')} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1, _col10, _col11 - Statistics: - numRows: 182 dataSize: 4062 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 182 Data size: 4062 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col10 - type: string - expr: _col11 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 182 dataSize: 4062 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 182 Data size: 4062 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 diff --git ql/src/test/results/clientpositive/union23.q.out ql/src/test/results/clientpositive/union23.q.out index 4b4d8e2..66ce233 100644 --- ql/src/test/results/clientpositive/union23.q.out +++ ql/src/test/results/clientpositive/union23.q.out @@ -16,9 +16,6 @@ from ( select key as key2, value as value2 from src) s order by s.key2, s.value2 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST key2 value2)))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) key2) (TOK_SELEXPR (TOK_TABLE_OR_COL value) value2))))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) value2))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL s) key2)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL s) value2))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -26,80 +23,56 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:s-subquery1:src + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - null-subquery2:s-subquery2:src - TableScan - alias: src - Select Operator - expressions: - expr: key - type: string - expr: value - type: string - outputColumnNames: _col0, _col1 - Union - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union24.q.out ql/src/test/results/clientpositive/union24.q.out index 2c41029..a88951a 100644 --- ql/src/test/results/clientpositive/union24.q.out +++ ql/src/test/results/clientpositive/union24.q.out @@ -51,7 +51,130 @@ select s.key, s.count from ( order by s.key ASC, s.count ASC POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL count))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL count))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src4))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL count))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src5))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) count)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) count))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL s) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL s) count))))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_UNION + TOK_UNION + TOK_UNION + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + count + TOK_WHERE + < + TOK_TABLE_OR_COL + key + 10 + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src3 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + count + TOK_WHERE + < + TOK_TABLE_OR_COL + key + 10 + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src4 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + count + TOK_WHERE + < + TOK_TABLE_OR_COL + key + 10 + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src5 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + count + TOK_WHERE + < + TOK_TABLE_OR_COL + key + 10 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + s + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + s + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + s + count + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + s + key + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + s + count + STAGE DEPENDENCIES: Stage-3 is a root stage @@ -61,52 +184,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:s-subquery2:src5 + Map Operator Tree: TableScan alias: src5 - Statistics: - numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 10) - type: boolean - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col1 - type: bigint + value expressions: _col1 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -157,25 +260,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 51 dataSize: 244 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 51 Data size: 244 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 51 dataSize: 244 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 51 Data size: 244 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -196,184 +289,93 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan - GatherStats: false - Union - Statistics: - numRows: 360 dataSize: 1726 basicStatsState: COMPLETE colStatsState: NONE - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1 - Statistics: - numRows: 360 dataSize: 1726 basicStatsState: COMPLETE colStatsState: NONE - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - sort order: ++ - Statistics: - numRows: 360 dataSize: 1726 basicStatsState: COMPLETE colStatsState: NONE - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - null-subquery1-subquery1-subquery1:s-subquery1-subquery1-subquery1:src2 - TableScan - alias: src2 - Statistics: - numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE + alias: src4 + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 10) - type: boolean - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: count - type: bigint + expressions: key (type: string), count (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Union - Statistics: - numRows: 360 dataSize: 1726 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 360 dataSize: 1726 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Statistics: - numRows: 360 dataSize: 1726 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - null-subquery1-subquery1-subquery2:s-subquery1-subquery1-subquery2:src3 + value expressions: _col0 (type: string), _col1 (type: bigint) + TableScan + GatherStats: false + Union + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan alias: src3 - Statistics: - numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 10) - type: boolean - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: count - type: bigint + expressions: key (type: string), count (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Union - Statistics: - numRows: 360 dataSize: 1726 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 360 dataSize: 1726 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Statistics: - numRows: 360 dataSize: 1726 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - null-subquery1-subquery2:s-subquery1-subquery2:src4 + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan - alias: src4 - Statistics: - numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE + alias: src2 + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 10) - type: boolean - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: count - type: bigint + expressions: key (type: string), count (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Union - Statistics: - numRows: 360 dataSize: 1726 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 360 dataSize: 1726 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Statistics: - numRows: 360 dataSize: 1726 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + value expressions: _col0 (type: string), _col1 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -531,15 +533,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 360 dataSize: 1726 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 360 dataSize: 1726 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -637,7 +637,126 @@ select s.key, s.count from ( order by s.key ASC, s.count ASC POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL count))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL count))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src4) a) (TOK_TABREF (TOK_TABNAME src5) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) count) count)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 10))))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) count))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL s) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL s) count))))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_UNION + TOK_UNION + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + count + TOK_WHERE + < + TOK_TABLE_OR_COL + key + 10 + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src3 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + count + TOK_WHERE + < + TOK_TABLE_OR_COL + key + 10 + TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src4 + a + TOK_TABREF + TOK_TABNAME + src5 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + count + count + TOK_WHERE + < + . + TOK_TABLE_OR_COL + a + key + 10 + s + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + s + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + s + count + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + s + key + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + s + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -647,61 +766,37 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:s-subquery2:a + Map Operator Tree: TableScan - alias: a - Statistics: - numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE + alias: b + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 10) - type: boolean - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string - null-subquery2:s-subquery2:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: count (type: bigint) TableScan - alias: b - Statistics: - numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE + alias: a + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 10) - type: boolean - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 - value expressions: - expr: count - type: bigint + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -800,19 +895,12 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col5 - Statistics: - numRows: 113 dataSize: 543 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 113 Data size: 543 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: bigint + expressions: _col0 (type: string), _col5 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 113 dataSize: 543 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 113 Data size: 543 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -833,135 +921,69 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Union - Statistics: - numRows: 319 dataSize: 1531 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 319 dataSize: 1531 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Statistics: - numRows: 319 dataSize: 1531 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - null-subquery1-subquery1:s-subquery1-subquery1:src2 + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan alias: src2 - Statistics: - numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 10) - type: boolean - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: count - type: bigint + expressions: key (type: string), count (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Union - Statistics: - numRows: 319 dataSize: 1531 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 319 dataSize: 1531 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Statistics: - numRows: 319 dataSize: 1531 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - null-subquery1-subquery2:s-subquery1-subquery2:src3 + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan alias: src3 - Statistics: - numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 10) - type: boolean - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: count - type: bigint + expressions: key (type: string), count (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Union - Statistics: - numRows: 319 dataSize: 1531 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 319 dataSize: 1531 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Statistics: - numRows: 319 dataSize: 1531 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + value expressions: _col0 (type: string), _col1 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1076,15 +1098,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 319 dataSize: 1531 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 319 dataSize: 1531 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1172,7 +1192,130 @@ select s.key, s.count from ( order by s.key ASC, s.count ASC POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL count))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL count))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10))))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src4) a) (TOK_TABREF (TOK_TABNAME src5) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) count)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 10)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) key))))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) count))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL s) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL s) count))))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_UNION + TOK_UNION + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + count + TOK_WHERE + < + TOK_TABLE_OR_COL + key + 10 + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src3 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + count + TOK_WHERE + < + TOK_TABLE_OR_COL + key + 10 + TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src4 + a + TOK_TABREF + TOK_TABNAME + src5 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + count + TOK_WHERE + < + . + TOK_TABLE_OR_COL + a + key + 10 + TOK_GROUPBY + . + TOK_TABLE_OR_COL + a + key + s + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + s + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + s + count + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + s + key + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + s + count + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1183,58 +1326,36 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:s-subquery2:a + Map Operator Tree: TableScan - alias: a - Statistics: - numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE + alias: b + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 10) - type: boolean - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE - tag: 0 - value expressions: - expr: key - type: string - null-subquery2:s-subquery2:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + tag: 1 TableScan - alias: b - Statistics: - numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE + alias: a + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 10) - type: boolean - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1333,28 +1454,18 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 - Statistics: - numRows: 113 dataSize: 543 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 113 Data size: 543 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: - numRows: 113 dataSize: 543 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 113 Data size: 543 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: - numRows: 113 dataSize: 543 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 113 Data size: 543 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -1375,24 +1486,16 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - Statistics: - numRows: 113 dataSize: 543 basicStatsState: COMPLETE colStatsState: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 113 Data size: 543 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col1 - type: bigint + value expressions: _col1 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1421,25 +1524,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: - numRows: 56 dataSize: 269 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 56 Data size: 269 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 56 dataSize: 269 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 56 Data size: 269 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -1460,135 +1553,69 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan GatherStats: false Union - Statistics: - numRows: 262 dataSize: 1257 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 262 dataSize: 1257 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Statistics: - numRows: 262 dataSize: 1257 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - null-subquery1-subquery1:s-subquery1-subquery1:src2 + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan alias: src2 - Statistics: - numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 10) - type: boolean - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: count - type: bigint + expressions: key (type: string), count (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Union - Statistics: - numRows: 262 dataSize: 1257 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 262 dataSize: 1257 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Statistics: - numRows: 262 dataSize: 1257 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - null-subquery1-subquery2:s-subquery1-subquery2:src3 + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan alias: src3 - Statistics: - numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 10) - type: boolean - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: count - type: bigint + expressions: key (type: string), count (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 103 dataSize: 494 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE Union - Statistics: - numRows: 262 dataSize: 1257 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: - numRows: 262 dataSize: 1257 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Statistics: - numRows: 262 dataSize: 1257 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + value expressions: _col0 (type: string), _col1 (type: bigint) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1703,15 +1730,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 262 dataSize: 1257 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 262 dataSize: 1257 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/union25.q.out ql/src/test/results/clientpositive/union25.q.out index d058da3..73635f9 100644 --- ql/src/test/results/clientpositive/union25.q.out +++ ql/src/test/results/clientpositive/union25.q.out @@ -55,9 +55,6 @@ FROM POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Lineage: tmp_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME tmp_unionall) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08') (= (. (TOK_TABLE_OR_COL a) hr) '11'))))) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp_srcpart) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08') (= (. (TOK_TABLE_OR_COL a) hr) '11'))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp_srcpart) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL b) ds) '2008-04-08') (= (. (TOK_TABLE_OR_COL b) hr) '11')))))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) master_table)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL key) value))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1) counts) (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -68,89 +65,65 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:a-subquery2:master_table-subquery1:t-subquery1:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - null-subquery2:a-subquery2:master_table-subquery2:t-subquery2:b + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE TableScan alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: _col0 - type: string + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: string + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col0 - type: string + expressions: _col0 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -158,113 +131,65 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 529 Data size: 11124 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 529 Data size: 11124 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 529 Data size: 11124 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint - null-subquery1:a-subquery1:a + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 529 Data size: 11124 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) TableScan alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 529 Data size: 11124 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 529 Data size: 11124 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 529 Data size: 11124 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 529 Data size: 11124 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 264 Data size: 5551 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: bigint - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col2 (type: bigint), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 264 Data size: 5551 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 264 Data size: 5551 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -281,12 +206,9 @@ STAGE PLANS: Create Table Operator: Create Table columns: counts bigint, key string, value string - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: tmp_unionall - isExternal: false Stage: Stage-3 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/union26.q.out ql/src/test/results/clientpositive/union26.q.out index 442819c..3fad78b 100644 --- ql/src/test/results/clientpositive/union26.q.out +++ ql/src/test/results/clientpositive/union26.q.out @@ -42,9 +42,6 @@ WHERE ds='2008-04-08' and hr='11' ) a group by key, value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (AND (and (and (and (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08') (= (. (TOK_TABLE_OR_COL a) hr) '11')) (= (. (TOK_TABLE_OR_COL b) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL b) hr) '12')) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))))) (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol (TOK_TABALIAS myTable))) (TOK_TABREF (TOK_TABNAME srcpart)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11')))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1) counts) (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 @@ -53,36 +50,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:a-subquery1:a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - null-subquery1:a-subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -90,18 +75,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -109,174 +90,106 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - TableScan - Union - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint - null-subquery2:a-subquery2:srcpart + Map Operator Tree: TableScan alias: srcpart + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 121 Data size: 18017 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 18017 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 121 Data size: 18017 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 121 Data size: 18017 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Select Operator - expressions: - expr: array(1,2,3) - type: array + expressions: array(1,2,3) (type: array) outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 121 Data size: 18017 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 18017 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - expr: _col1 - type: string + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 121 Data size: 18017 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 121 Data size: 18017 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + TableScan + Union + Statistics: Num rows: 121 Data size: 18017 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 18017 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 121 Data size: 18017 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 121 Data size: 18017 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 8934 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: bigint - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col2 (type: bigint), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 8934 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 60 Data size: 8934 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union28.q.out ql/src/test/results/clientpositive/union28.q.out index c9b39eb..c8b738f 100644 --- ql/src/test/results/clientpositive/union28.q.out +++ ql/src/test/results/clientpositive/union28.q.out @@ -29,9 +29,6 @@ select * from ( ) subq ) a POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME union_subq_union))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-9 is a root stage Stage-10 depends on stages: Stage-9, Stage-11 @@ -48,68 +45,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:a-subquery2-subquery1:subq-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + aggregations: count(1) + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -117,37 +85,29 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -155,51 +115,43 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - TableScan - Union - Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - null-subquery1:a-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 57 Data size: 11422 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 57 Data size: 11422 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 57 Data size: 11422 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + TableScan + Union + Statistics: Num rows: 57 Data size: 11422 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 57 Data size: 11422 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 57 Data size: 11422 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union Stage: Stage-8 Conditional Operator @@ -225,12 +177,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -239,12 +189,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -259,68 +207,39 @@ STAGE PLANS: Stage: Stage-11 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:a-subquery2-subquery2:subq-subquery2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + aggregations: count(1) + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/union29.q.out ql/src/test/results/clientpositive/union29.q.out index 36c9348..47dfd84 100644 --- ql/src/test/results/clientpositive/union29.q.out +++ ql/src/test/results/clientpositive/union29.q.out @@ -29,9 +29,6 @@ select * from ( ) subq ) a POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME union_subq_union))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 @@ -45,96 +42,77 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:a-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - null-subquery2:a-subquery2-subquery1:subq-subquery1:src TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - null-subquery2:a-subquery2-subquery2:subq-subquery2:src TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -165,12 +143,10 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -179,12 +155,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union3.q.out ql/src/test/results/clientpositive/union3.q.out index 7523974..10a97d1 100644 --- ql/src/test/results/clientpositive/union3.q.out +++ ql/src/test/results/clientpositive/union3.q.out @@ -34,9 +34,6 @@ FROM ( FROM (SELECT * FROM src LIMIT 1) s2 ) a POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 1))) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 1 id)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL id)))) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 1))) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 2 id)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL id))))) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 1))) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 3 id))))) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 1))) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 4 id))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3, Stage-5, Stage-7 @@ -50,37 +47,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:a-subquery2:s2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: 4 - type: int + expressions: 4 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -88,63 +81,59 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 4 Data size: 800 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 4 Data size: 800 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 4 Data size: 800 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 4 Data size: 800 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int + expressions: _col0 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 4 Data size: 800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -152,37 +141,33 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery2:a-subquery1-subquery2:s2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: 3 - type: int + expressions: 3 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -190,37 +175,33 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery1-subquery1:a-subquery1-subquery1-subquery1:s1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: 1 - type: int + expressions: 1 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -228,26 +209,19 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -255,37 +229,33 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery1-subquery2:a-subquery1-subquery1-subquery2:s1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: 2 - type: int + expressions: 2 (type: int) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -293,26 +263,19 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: int + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col0 - type: int + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/union30.q.out ql/src/test/results/clientpositive/union30.q.out index dcc7560..054e6ae 100644 --- ql/src/test/results/clientpositive/union30.q.out +++ ql/src/test/results/clientpositive/union30.q.out @@ -43,9 +43,6 @@ union all select key, value from src ) aa POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) aa)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME union_subq_union))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-11 @@ -63,68 +60,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:aa-subquery1-subquery2:a-subquery2-subquery1:subq-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + aggregations: count(1) + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -132,37 +100,29 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 5610 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -170,92 +130,74 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - TableScan - Union - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - null-subquery1:aa-subquery1-subquery1:a-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 57 Data size: 11422 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 57 Data size: 11422 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TableScan + Union + Statistics: Num rows: 57 Data size: 11422 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 57 Data size: 11422 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 86 Data size: 17234 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 86 Data size: 17234 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 86 Data size: 17234 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - null-subquery2:aa-subquery2:src TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 86 Data size: 17234 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 86 Data size: 17234 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 86 Data size: 17234 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -286,12 +228,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -300,12 +240,10 @@ STAGE PLANS: Stage: Stage-8 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -320,68 +258,39 @@ STAGE PLANS: Stage: Stage-11 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:aa-subquery1-subquery2:a-subquery2-subquery2:subq-subquery2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - expr: value - type: string + aggregations: count(1) + keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: string - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - expr: KEY._col1 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: bigint + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/union31.q.out ql/src/test/results/clientpositive/union31.q.out index 6d58e2a..8ac096e 100644 --- ql/src/test/results/clientpositive/union31.q.out +++ ql/src/test/results/clientpositive/union31.q.out @@ -52,9 +52,6 @@ insert overwrite table t3 insert overwrite table t4 select value, count(1) group by value POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME t3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME t4))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL value)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 @@ -66,143 +63,103 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:x-subquery1:t1 + Map Operator Tree: TableScan - alias: t1 + alias: t2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col1 + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col1 - type: string + aggregations: count(1) + keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - null-subquery2:x-subquery2:t2 TableScan - alias: t2 + alias: t1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Select Operator - expressions: - expr: _col1 - type: string + expressions: _col1 (type: string) outputColumnNames: _col1 + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col1 - type: string + aggregations: count(1) + keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -224,41 +181,28 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -392,9 +336,6 @@ POSTHOOK: Lineage: t3.cnt EXPRESSION [(t2)t2.null, (t1)t1.null, ] POSTHOOK: Lineage: t3.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t4.cnt EXPRESSION [(t2)t2.null, (t1)t1.null, ] POSTHOOK: Lineage: t4.value EXPRESSION [(t2)t2.FieldSchema(name:value, type:string, comment:null), (t1)t1.FieldSchema(name:value, type:string, comment:null), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) c1) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) c1) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME t5))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL c1)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL cnt)))) (TOK_GROUPBY (TOK_TABLE_OR_COL c1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME t6))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL c1)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL cnt)))) (TOK_GROUPBY (TOK_TABLE_OR_COL c1)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2, Stage-6 @@ -407,56 +348,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:x-subquery2:t2 + Map Operator Tree: TableScan alias: t2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -464,96 +388,67 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint -#### A masked pattern was here #### + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan Union + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Forward + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t5 Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -588,56 +483,39 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:x-subquery1:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -845,9 +723,6 @@ POSTHOOK: Lineage: t5.c1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, c POSTHOOK: Lineage: t5.cnt EXPRESSION [(t2)t2.null, (t1)t1.null, ] POSTHOOK: Lineage: t6.c1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t6.cnt EXPRESSION [(t2)t2.null, (t1)t1.null, ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) c1) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) c1) (TOK_SELEXPR (TOK_TABLE_OR_COL cnt)))))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME t7))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL c1)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL c1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME t8))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL c1)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL c1)))) - STAGE DEPENDENCIES: Stage-6 is a root stage Stage-3 depends on stages: Stage-6 @@ -859,56 +734,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:x-subquery1:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -916,100 +774,73 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - TableScan - Union - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: 1 - type: int - null-subquery2:x-subquery2:t2 + Map Operator Tree: TableScan alias: t2 + Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: cnt - type: bigint + expressions: key (type: string), cnt (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: 1 - type: int + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE + value expressions: 1 (type: int) + TableScan + Union + Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE + value expressions: 1 (type: int) Reduce Operator Tree: Forward + Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(1) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t7 Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(1) + keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union32.q.out ql/src/test/results/clientpositive/union32.q.out index 8cfdcc1..0a6f3a9 100644 --- ql/src/test/results/clientpositive/union32.q.out +++ ql/src/test/results/clientpositive/union32.q.out @@ -34,9 +34,6 @@ UNION ALL SELECT CAST(key AS BIGINT) AS key FROM t2) a ORDER BY key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL key)) key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_BIGINT (TOK_TABLE_OR_COL key)) key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -44,58 +41,49 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:a-subquery1:t1 + Map Operator Tree: TableScan alias: t1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToDouble(key) - type: double + expressions: UDFToDouble(key) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double + expressions: _col0 (type: double) outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: double - null-subquery2:a-subquery2:t2 + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) TableScan alias: t2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToDouble(UDFToLong(key)) - type: double + expressions: UDFToDouble(UDFToLong(key)) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double + expressions: _col0 (type: double) outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: double + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) Reduce Operator Tree: Extract + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -159,9 +147,6 @@ UNION ALL SELECT CAST(key AS DOUBLE) AS key FROM t2) a ORDER BY key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_TABREF (TOK_TABNAME t2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_BIGINT (. (TOK_TABLE_OR_COL a) key)) key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL key)) key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 @@ -170,34 +155,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:a-subquery1:a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - null-subquery1:a-subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -205,16 +180,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToDouble(UDFToLong(_col0)) - type: double + expressions: UDFToDouble(UDFToLong(_col0)) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -222,52 +195,43 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - TableScan - Union - Select Operator - expressions: - expr: _col0 - type: double - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 - type: double - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: double - null-subquery2:a-subquery2:t2 + Map Operator Tree: TableScan alias: t2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToDouble(key) - type: double + expressions: UDFToDouble(key) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double + expressions: _col0 (type: double) outputColumnNames: _col0 + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: double + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + TableScan + Union + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) Reduce Operator Tree: Extract + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -343,9 +307,6 @@ UNION ALL SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a ORDER BY key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL key)) key)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_TABREF (TOK_TABNAME t2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_BIGINT (. (TOK_TABLE_OR_COL a) key)) key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -354,34 +315,24 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:a-subquery2:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - null-subquery2:a-subquery2:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -389,16 +340,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 - handleSkewJoin: false outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToDouble(UDFToLong(_col0)) - type: double + expressions: UDFToDouble(UDFToLong(_col0)) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -406,52 +355,43 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double + expressions: _col0 (type: double) outputColumnNames: _col0 + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: double - null-subquery1:a-subquery1:t2 + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) TableScan alias: t2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToDouble(key) - type: double + expressions: UDFToDouble(key) (type: double) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double + expressions: _col0 (type: double) outputColumnNames: _col0 + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: double + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) Reduce Operator Tree: Extract + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -527,9 +467,6 @@ UNION ALL SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a ORDER BY key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_TABREF (TOK_TABNAME t2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_BIGINT (. (TOK_TABLE_OR_COL a) key)) key) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (. (TOK_TABLE_OR_COL b) key)) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL key)) key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_TABLE_OR_COL key)) value))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 @@ -538,37 +475,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:a-subquery1:a + Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - null-subquery1:a-subquery1:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan - alias: b + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -576,18 +501,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col4 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToDouble(UDFToLong(_col0)) - type: double - expr: UDFToString(UDFToDouble(_col4)) - type: string + expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToString(UDFToDouble(_col4)) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -595,62 +516,43 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - TableScan - Union - Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: double - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: double - expr: _col1 - type: string - null-subquery2:a-subquery2:t2 + Map Operator Tree: TableScan alias: t2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToDouble(key) - type: double - expr: key - type: string + expressions: UDFToDouble(key) (type: double), key (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: string + expressions: _col0 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: double - expr: _col1 - type: string + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: string) + TableScan + Union + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -726,9 +628,6 @@ UNION ALL SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a ORDER BY key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL key)) key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_TABLE_OR_COL key)) value)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_TABREF (TOK_TABNAME t2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_BIGINT (. (TOK_TABLE_OR_COL a) key)) key) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (. (TOK_TABLE_OR_COL b) key)) value))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -737,37 +636,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:a-subquery2:a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - null-subquery2:a-subquery2:b + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -775,18 +662,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col4 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToDouble(UDFToLong(_col0)) - type: double - expr: UDFToDouble(_col4) - type: double + expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToDouble(_col4) (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -794,62 +677,43 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: double + expressions: _col0 (type: double), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: double - expr: _col1 - type: double - null-subquery1:a-subquery1:t2 + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double) TableScan alias: t2 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToDouble(key) - type: double - expr: UDFToDouble(key) - type: double + expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: double + expressions: _col0 (type: double), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: double - expr: _col1 - type: double + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double) Reduce Operator Tree: Extract + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union33.q.out ql/src/test/results/clientpositive/union33.q.out index 3e9ddbe..09bba51 100644 --- ql/src/test/results/clientpositive/union33.q.out +++ ql/src/test/results/clientpositive/union33.q.out @@ -27,9 +27,6 @@ UNION ALL GROUP BY key )a POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 0)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT) value)) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_src))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-9 is a root stage Stage-10 depends on stages: Stage-9 @@ -45,49 +42,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:a-subquery2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -95,41 +78,27 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToString(_col1) - type: string + expressions: _col0 (type: string), UDFToString(_col1) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -137,55 +106,46 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - TableScan - Union - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_src - null-subquery1:a-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 0) - type: boolean + predicate: (key = 0) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 43 Data size: 5711 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 43 Data size: 5711 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 43 Data size: 5711 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + TableScan + Union + Statistics: Num rows: 43 Data size: 5711 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 43 Data size: 5711 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 43 Data size: 5711 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_src Stage: Stage-8 Conditional Operator @@ -211,12 +171,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -225,12 +183,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -298,9 +254,6 @@ UNION ALL POSTHOOK: type: QUERY POSTHOOK: Lineage: test_src.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_src.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT) value)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 0))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_src))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -316,49 +269,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:a-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: key - type: string + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -366,41 +305,27 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToString(_col1) - type: string + expressions: _col0 (type: string), UDFToString(_col1) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -408,50 +333,41 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 43 Data size: 5711 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 43 Data size: 5711 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 43 Data size: 5711 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src - null-subquery2:a-subquery2:src TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 0) - type: boolean + predicate: (key = 0) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 43 Data size: 5711 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 43 Data size: 5711 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 43 Data size: 5711 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -482,12 +398,10 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -496,12 +410,10 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union34.q.out ql/src/test/results/clientpositive/union34.q.out index 58fd2ad..cc7ad8c 100644 --- ql/src/test/results/clientpositive/union34.q.out +++ ql/src/test/results/clientpositive/union34.q.out @@ -74,9 +74,6 @@ POSTHOOK: Lineage: src10_3.key SIMPLE [(src)src.FieldSchema(name:key, type:strin POSTHOOK: Lineage: src10_3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src10_4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src10_4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src10_1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) sub1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src10_2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) sub0) (= (. (TOK_TABLE_OR_COL sub0) key) (. (TOK_TABLE_OR_COL sub1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL sub1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL sub1) value))))) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src10_3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) sub2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src10_4))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) alias0)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) alias1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-7 is a root stage Stage-2 depends on stages: Stage-7 @@ -93,153 +90,111 @@ STAGE PLANS: null-subquery1:alias1-subquery1:sub1:src10_1 TableScan alias: src10_1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 {_col0} {_col1} 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 1 + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:alias1-subquery1:sub0:src10_2 + Map Operator Tree: TableScan - alias: src10_2 + alias: src10_3 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - outputColumnNames: _col0 - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - outputColumnNames: _col0, _col1 - Position of Big Table: 1 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - null-subquery2:alias1-subquery2-subquery1:alias0-subquery1:sub2:src10_3 + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan - alias: src10_3 + alias: src10_4 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - null-subquery2:alias1-subquery2-subquery2:alias0-subquery2:src10_4 + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan - alias: src10_4 + alias: src10_2 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - outputColumnNames: _col0, _col1 - Union + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Local Work: Map Reduce Local Work Reduce Operator Tree: Extract + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -339,9 +294,6 @@ POSTHOOK: Lineage: src10_3.key SIMPLE [(src)src.FieldSchema(name:key, type:strin POSTHOOK: Lineage: src10_3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src10_4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src10_4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src10_1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) sub1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src10_2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) sub0) (= (. (TOK_TABLE_OR_COL sub0) key) (. (TOK_TABLE_OR_COL sub1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL sub1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL sub1) value))))) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src10_3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) sub2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src10_4))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) alias0)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) alias1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -350,48 +302,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:alias1-subquery1:sub0:src10_2 + Map Operator Tree: TableScan alias: src10_2 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - null-subquery1:alias1-subquery1:sub1:src10_1 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE TableScan alias: src10_1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -399,18 +335,14 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 - handleSkewJoin: false outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -418,107 +350,73 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - null-subquery2:alias1-subquery2-subquery1:alias0-subquery1:sub2:src10_3 + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src10_3 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - null-subquery2:alias1-subquery2-subquery2:alias0-subquery2:src10_4 + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: src10_4 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 31 Data size: 322 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union4.q.out ql/src/test/results/clientpositive/union4.q.out index 465a00a..5b174a0 100644 --- ql/src/test/results/clientpositive/union4.q.out +++ ql/src/test/results/clientpositive/union4.q.out @@ -21,9 +21,6 @@ insert overwrite table tmptable UNION ALL select 'tst2' as key, count(1) as value from src s2) unionsrc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst2' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME tmptable))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-9 @@ -39,40 +36,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:unionsrc-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst1' - type: string - expr: _col0 - type: bigint + expressions: 'tst1' (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -80,38 +70,32 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -142,12 +126,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -156,12 +138,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -176,40 +156,33 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:unionsrc-subquery2:s2 + Map Operator Tree: TableScan alias: s2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst2' - type: string - expr: _col0 - type: bigint + expressions: 'tst2' (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/union5.q.out ql/src/test/results/clientpositive/union5.q.out index 25fc927..0087393 100644 --- ql/src/test/results/clientpositive/union5.q.out +++ ql/src/test/results/clientpositive/union5.q.out @@ -12,9 +12,6 @@ explain UNION ALL select 'tst2' as key, count(1) as value from src s2) unionsrc group by unionsrc.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst2' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL unionsrc) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3 @@ -24,40 +21,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:unionsrc-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst1' - type: string - expr: _col0 - type: bigint + expressions: 'tst1' (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -65,85 +55,59 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint -#### A masked pattern was here #### + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) TableScan Union + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -151,40 +115,33 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:unionsrc-subquery2:s2 + Map Operator Tree: TableScan alias: s2 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst2' - type: string - expr: _col0 - type: bigint + expressions: 'tst2' (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/union6.q.out ql/src/test/results/clientpositive/union6.q.out index 992648e..4f35a6b 100644 --- ql/src/test/results/clientpositive/union6.q.out +++ ql/src/test/results/clientpositive/union6.q.out @@ -21,9 +21,6 @@ insert overwrite table tmptable UNION ALL select s2.key as key, s2.value as value from src1 s2) unionsrc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_FUNCTION count 1)) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME tmptable))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -38,40 +35,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:unionsrc-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst1' - type: string - expr: UDFToString(_col0) - type: string + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -79,46 +69,38 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 2 Data size: 488 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - null-subquery2:unionsrc-subquery2:s2 TableScan alias: s2 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 2 Data size: 488 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -149,12 +131,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -163,12 +143,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union7.q.out ql/src/test/results/clientpositive/union7.q.out index cbacc9c..3a2d88c 100644 --- ql/src/test/results/clientpositive/union7.q.out +++ ql/src/test/results/clientpositive/union7.q.out @@ -12,9 +12,6 @@ explain UNION ALL select s2.key as key, s2.value as value from src1 s2) unionsrc group by unionsrc.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_FUNCTION count 1)) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL unionsrc) key)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -23,40 +20,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:unionsrc-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: 'tst1' - type: string - expr: UDFToString(_col0) - type: string + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -64,93 +54,65 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 2 Data size: 488 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 488 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - null-subquery2:unionsrc-subquery2:s2 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: bigint) TableScan alias: s2 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 2 Data size: 488 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string + expressions: _col0 (type: string) outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 488 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string + aggregations: count(1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union8.q.out ql/src/test/results/clientpositive/union8.q.out index 4b0dec3..1d5193a 100644 --- ql/src/test/results/clientpositive/union8.q.out +++ ql/src/test/results/clientpositive/union8.q.out @@ -12,9 +12,6 @@ explain select s2.key as key, s2.value as value from src s2 UNION ALL select s3.key as key, s3.value as value from src s3) unionsrc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s1) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s1) value) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) value))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s3) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s3) value) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -22,78 +19,63 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery1:unionsrc-subquery1-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - null-subquery1-subquery2:unionsrc-subquery1-subquery2:s2 TableScan alias: s2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - null-subquery2:unionsrc-subquery2:s3 TableScan alias: s3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union9.q.out ql/src/test/results/clientpositive/union9.q.out index bf596c7..c6cc511 100644 --- ql/src/test/results/clientpositive/union9.q.out +++ ql/src/test/results/clientpositive/union9.q.out @@ -12,9 +12,6 @@ explain select s2.key as key, s2.value as value from src s2 UNION ALL select s3.key as key, s3.value as value from src s3) unionsrc POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s1) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s1) value) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) value))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s3) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s3) value) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -22,94 +19,80 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery1:unionsrc-subquery1-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery1-subquery2:unionsrc-subquery1-subquery2:s2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: s2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - null-subquery2:unionsrc-subquery2:s3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: s3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union_lateralview.q.out ql/src/test/results/clientpositive/union_lateralview.q.out index 0497cfa..85a4778 100644 --- ql/src/test/results/clientpositive/union_lateralview.q.out +++ ql/src/test/results/clientpositive/union_lateralview.q.out @@ -41,9 +41,6 @@ FROM ( LEFT OUTER JOIN src b ON d.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION EXPLODE (TOK_TABLE_OR_COL arr)) arr_ele (TOK_TABALIAS c))) (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION array 1 2 3) arr)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION array 1 2 3) arr)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '12')))))) a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) arr_ele) arr_ele) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)))) d) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL d) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_union_lateral_view))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL d) arr_ele)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL d) value))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -52,178 +49,112 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - d-subquery1:a-subquery1:src + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: array(1,2,3) - type: array + expressions: key (type: string), value (type: string), array(1,2,3) (type: array) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col3 - type: int - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col3 (type: int), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col2 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) Select Operator - expressions: - expr: _col2 - type: array + expressions: _col2 (type: array) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col3 - type: int - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col3 (type: int), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col2 - type: string - d-subquery2:a-subquery2:srcpart + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) TableScan alias: srcpart + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: array(1,2,3) - type: array + expressions: key (type: string), value (type: string), array(1,2,3) (type: array) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Lateral View Forward + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col3 - type: int - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col3 (type: int), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col2 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) Select Operator - expressions: - expr: _col2 - type: array + expressions: _col2 (type: array) outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE UDTF Operator + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col3 - type: int - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col3 (type: int), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col1 - type: string + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col2 - type: string + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -231,20 +162,15 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} {VALUE._col2} 1 {VALUE._col0} - handleSkewJoin: false outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 127 Data size: 25572 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToInteger(_col3) - type: int - expr: _col0 - type: int - expr: _col2 - type: string + expressions: UDFToInteger(_col3) (type: int), _col0 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 127 Data size: 25572 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 127 Data size: 25572 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/union_ppr.q.out ql/src/test/results/clientpositive/union_ppr.q.out index 352fa4b..f639960 100644 --- ql/src/test/results/clientpositive/union_ppr.q.out +++ ql/src/test/results/clientpositive/union_ppr.q.out @@ -17,7 +17,92 @@ WHERE A.ds = '2008-04-08' SORT BY A.key, A.value, A.ds, A.hr POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRCPART) X)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME X)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL X) key) 100)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRCPART) Y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME Y)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL Y) key) 100))))) A)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL A) ds) '2008-04-08')) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL A) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL A) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL A) ds)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL A) hr))))) + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_UNION + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + SRCPART + X + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + X + TOK_WHERE + < + . + TOK_TABLE_OR_COL + X + key + 100 + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + SRCPART + Y + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + Y + TOK_WHERE + < + . + TOK_TABLE_OR_COL + Y + key + 100 + A + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + . + TOK_TABLE_OR_COL + A + ds + '2008-04-08' + TOK_SORTBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + A + key + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + A + value + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + A + ds + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + A + hr + STAGE DEPENDENCIES: Stage-1 is a root stage @@ -26,137 +111,55 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:a-subquery1:x + Map Operator Tree: TableScan alias: x - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 100) - type: boolean - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Union - Statistics: - numRows: 38 dataSize: 7614 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7614 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 38 dataSize: 7614 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7614 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: ++++ - Statistics: - numRows: 38 dataSize: 7614 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7614 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - null-subquery2:a-subquery2:y + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: y - Statistics: - numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: - expr: (key < 100) - type: boolean - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: value - type: string - expr: ds - type: string - expr: hr - type: string + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 19 dataSize: 3807 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Union - Statistics: - numRows: 38 dataSize: 7614 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7614 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: - numRows: 38 dataSize: 7614 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7614 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: ++++ - Statistics: - numRows: 38 dataSize: 7614 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7614 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -250,15 +253,13 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: - numRows: 38 dataSize: 7614 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7614 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: - numRows: 38 dataSize: 7614 basicStatsState: COMPLETE colStatsState: NONE + Statistics: Num rows: 38 Data size: 7614 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/union_remove_6.q.out ql/src/test/results/clientpositive/union_remove_6.q.out index d2ce74f..2e2d919 100644 --- ql/src/test/results/clientpositive/union_remove_6.q.out +++ ql/src/test/results/clientpositive/union_remove_6.q.out @@ -51,9 +51,6 @@ FROM ( insert overwrite table outputTbl1 select * insert overwrite table outputTbl2 select * POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2, Stage-4 @@ -64,56 +61,39 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:a-subquery2:inputtbl1 + Map Operator Tree: TableScan alias: inputtbl1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -121,68 +101,56 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -211,56 +179,39 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:a-subquery1:inputtbl1 + Map Operator Tree: TableScan alias: inputtbl1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: - expr: key - type: string + expressions: key (type: string) outputColumnNames: key + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string + aggregations: count(1) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: string + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/union_top_level.q.out ql/src/test/results/clientpositive/union_top_level.q.out index 1cc2979..0e2d67a 100644 --- ql/src/test/results/clientpositive/union_top_level.q.out +++ ql/src/test/results/clientpositive/union_top_level.q.out @@ -14,9 +14,6 @@ select key, 1 as value from src where key % 3 == 1 limit 3 union all select key, 2 as value from src where key % 3 == 2 limit 3 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 0 value)) (TOK_WHERE (== (% (TOK_TABLE_OR_COL key) 3) 0)) (TOK_LIMIT 3))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 value)) (TOK_WHERE (== (% (TOK_TABLE_OR_COL key) 3) 1)) (TOK_LIMIT 3)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 2 value)) (TOK_WHERE (== (% (TOK_TABLE_OR_COL key) 3) 2)) (TOK_LIMIT 3)))) _u1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3, Stage-4 @@ -27,36 +24,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery1:_u1-subquery1-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key % 3) = 0) - type: boolean + predicate: ((key % 3) = 0) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: 0 - type: int + expressions: key (type: string), 0 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -64,54 +57,45 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -119,36 +103,32 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery2:_u1-subquery1-subquery2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key % 3) = 1) - type: boolean + predicate: ((key % 3) = 1) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: 1 - type: int + expressions: key (type: string), 1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -156,36 +136,32 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:_u1-subquery2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key % 3) = 2) - type: boolean + predicate: ((key % 3) = 2) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: 2 - type: int + expressions: key (type: string), 2 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -230,9 +206,6 @@ select s1.key as k, s2.value as v from src s1 join src s2 on (s1.key = s2.key) l union all select s1.key as k, s2.value as v from src s1 join src s2 on (s1.key = s2.key) limit 10 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) s1) (TOK_TABREF (TOK_TABNAME src) s2) (= (. (TOK_TABLE_OR_COL s1) key) (. (TOK_TABLE_OR_COL s2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s1) key) k) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) v)) (TOK_LIMIT 10))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) s1) (TOK_TABREF (TOK_TABNAME src) s2) (= (. (TOK_TABLE_OR_COL s1) key) (. (TOK_TABLE_OR_COL s2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s1) key) k) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) v)) (TOK_LIMIT 10)))) _u1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -244,37 +217,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1:_u1-subquery1:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - null-subquery1:_u1-subquery1:s2 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: s2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -282,19 +243,17 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col5 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1010 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -302,23 +261,20 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 10 Data size: 1010 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 1010 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1010 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -326,37 +282,31 @@ STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 20 Data size: 2020 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 2020 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 2020 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 20 Data size: 2020 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 2020 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 2020 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -364,37 +314,25 @@ STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:_u1-subquery2:s1 + Map Operator Tree: TableScan alias: s1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - null-subquery2:_u1-subquery2:s2 + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) TableScan alias: s2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: key - type: string + key expressions: key (type: string) sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: value - type: string + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -402,19 +340,17 @@ STAGE PLANS: condition expressions: 0 {VALUE._col0} 1 {VALUE._col1} - handleSkewJoin: false outputColumnNames: _col0, _col5 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string + expressions: _col0 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1010 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -422,23 +358,20 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + Statistics: Num rows: 10 Data size: 1010 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 10 Data size: 1010 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1010 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -498,9 +431,6 @@ select key, 1 as value from src where key % 3 == 1 limit 3 union all select key, 2 as value from src where key % 3 == 2 limit 3 POSTHOOK: type: CREATETABLE_AS_SELECT -ABSTRACT SYNTAX TREE: - (TOK_CREATETABLE (TOK_TABNAME union_top) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 0 value)) (TOK_WHERE (== (% (TOK_TABLE_OR_COL key) 3) 0)) (TOK_LIMIT 3))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 value)) (TOK_WHERE (== (% (TOK_TABLE_OR_COL key) 3) 1)) (TOK_LIMIT 3)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 2 value)) (TOK_WHERE (== (% (TOK_TABLE_OR_COL key) 3) 2)) (TOK_LIMIT 3)))) _u1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-9, Stage-10 @@ -518,36 +448,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery1:_u1-subquery1-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key % 3) = 0) - type: boolean + predicate: ((key % 3) = 0) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: 0 - type: int + expressions: key (type: string), 0 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -555,56 +481,47 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -630,24 +547,19 @@ STAGE PLANS: Create Table Operator: Create Table columns: key string, value int - if not exists: false input format: org.apache.hadoop.mapred.TextInputFormat - # buckets: -1 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: union_top - isExternal: false Stage: Stage-3 Stats-Aggr Operator Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -656,12 +568,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -676,36 +586,32 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery2:_u1-subquery1-subquery2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key % 3) = 1) - type: boolean + predicate: ((key % 3) = 1) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: 1 - type: int + expressions: key (type: string), 1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -713,36 +619,32 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:_u1-subquery2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key % 3) = 2) - type: boolean + predicate: ((key % 3) = 2) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: 2 - type: int + expressions: key (type: string), 2 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -806,9 +708,6 @@ select key, 1 as value from src where key % 3 == 1 limit 3 union all select key, 2 as value from src where key % 3 == 2 limit 3 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 0 value)) (TOK_WHERE (== (% (TOK_TABLE_OR_COL key) 3) 0)) (TOK_LIMIT 3))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 value)) (TOK_WHERE (== (% (TOK_TABLE_OR_COL key) 3) 1)) (TOK_LIMIT 3)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 2 value)) (TOK_WHERE (== (% (TOK_TABLE_OR_COL key) 3) 2)) (TOK_LIMIT 3)))) _u1)) (TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME union_top))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-9, Stage-10 @@ -825,36 +724,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery1:_u1-subquery1-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key % 3) = 0) - type: boolean + predicate: ((key % 3) = 0) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: 0 - type: int + expressions: key (type: string), 0 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -862,56 +757,47 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -942,12 +828,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -956,12 +840,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -976,36 +858,32 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery2:_u1-subquery1-subquery2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key % 3) = 1) - type: boolean + predicate: ((key % 3) = 1) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: 1 - type: int + expressions: key (type: string), 1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1013,36 +891,32 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:_u1-subquery2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key % 3) = 2) - type: boolean + predicate: ((key % 3) = 2) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: 2 - type: int + expressions: key (type: string), 2 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1105,9 +979,6 @@ select key, 2 as value from src where key % 3 == 2 limit 3 POSTHOOK: type: QUERY POSTHOOK: Lineage: union_top.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: union_top.value EXPRESSION [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 0 value)) (TOK_WHERE (== (% (TOK_TABLE_OR_COL key) 3) 0)) (TOK_LIMIT 3))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 value)) (TOK_WHERE (== (% (TOK_TABLE_OR_COL key) 3) 1)) (TOK_LIMIT 3)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 2 value)) (TOK_WHERE (== (% (TOK_TABLE_OR_COL key) 3) 2)) (TOK_LIMIT 3)))) _u1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME union_top))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-9, Stage-10 @@ -1124,36 +995,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery1:_u1-subquery1-subquery1:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key % 3) = 0) - type: boolean + predicate: ((key % 3) = 0) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: 0 - type: int + expressions: key (type: string), 0 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1161,56 +1028,47 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Union + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top -#### A masked pattern was here #### TableScan Union + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: int + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + Statistics: Num rows: 9 Data size: 900 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1241,12 +1099,10 @@ STAGE PLANS: Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1255,12 +1111,10 @@ STAGE PLANS: Stage: Stage-6 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1275,36 +1129,32 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - null-subquery1-subquery2:_u1-subquery1-subquery2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key % 3) = 1) - type: boolean + predicate: ((key % 3) = 1) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: 1 - type: int + expressions: key (type: string), 1 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1312,36 +1162,32 @@ STAGE PLANS: Stage: Stage-10 Map Reduce - Alias -> Map Operator Tree: - null-subquery2:_u1-subquery2:src + Map Operator Tree: TableScan alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key % 3) = 2) - type: boolean + predicate: ((key % 3) = 2) (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: string - expr: 2 - type: int + expressions: key (type: string), 2 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: int + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) Reduce Operator Tree: Extract + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1412,9 +1258,6 @@ POSTHOOK: Lineage: union_top.key EXPRESSION [(src)src.FieldSchema(name:key, type POSTHOOK: Lineage: union_top.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: union_top.value EXPRESSION [] POSTHOOK: Lineage: union_top.value EXPRESSION [] -ABSTRACT SYNTAX TREE: - (TOK_CREATEVIEW (TOK_TABNAME union_top_view) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 0 value)) (TOK_WHERE (== (% (TOK_TABLE_OR_COL key) 3) 0)) (TOK_LIMIT 3))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 value)) (TOK_WHERE (== (% (TOK_TABLE_OR_COL key) 3) 1)) (TOK_LIMIT 3)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 2 value)) (TOK_WHERE (== (% (TOK_TABLE_OR_COL key) 3) 2)) (TOK_LIMIT 3)))) _u1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -1422,8 +1265,6 @@ STAGE PLANS: Stage: Stage-0 Create View Operator: Create View - if not exists: false - is alter view as select: false or replace: false columns: key string, value int expanded text: select `src`.`key`, 0 as `value` from `default`.`src` where `src`.`key` % 3 == 0 limit 3 diff --git ql/src/test/results/clientpositive/union_view.q.out ql/src/test/results/clientpositive/union_view.q.out index 20d8414..09fbae5 100644 --- ql/src/test/results/clientpositive/union_view.q.out +++ ql/src/test/results/clientpositive/union_view.q.out @@ -28,9 +28,6 @@ PREHOOK: type: CREATEINDEX POSTHOOK: query: CREATE INDEX src_union_3_key_idx ON TABLE src_union_3(key) AS 'COMPACT' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX POSTHOOK: Output: default@default__src_union_3_src_union_3_key_idx__ -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_union_1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL key) 86) (= (TOK_TABLE_OR_COL ds) '1'))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 @@ -40,27 +37,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - default__src_union_1_src_union_1_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_1_src_union_1_key_idx__ filterExpr: - expr: ((key = 86) and (ds = '1')) - type: boolean Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -74,29 +61,21 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_union_1 + Map Operator Tree: TableScan alias: src_union_1 filterExpr: - expr: ((key = 86) and (ds = '1')) - type: boolean + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -106,9 +85,6 @@ STAGE PLANS: Fetch Operator limit: -1 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_union_2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL key) 86) (= (TOK_TABLE_OR_COL ds) '2'))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 @@ -118,27 +94,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - default__src_union_2_src_union_2_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_2_src_union_2_key_idx__ filterExpr: - expr: ((key = 86) and (ds = '2')) - type: boolean Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -152,29 +118,21 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_union_2 + Map Operator Tree: TableScan alias: src_union_2 filterExpr: - expr: ((key = 86) and (ds = '2')) - type: boolean + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -184,9 +142,6 @@ STAGE PLANS: Fetch Operator limit: -1 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_union_3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL key) 86) (= (TOK_TABLE_OR_COL ds) '3'))))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 @@ -196,27 +151,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - default__src_union_3_src_union_3_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_3_src_union_3_key_idx__ filterExpr: - expr: ((key = 86) and (ds = '3')) - type: boolean Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -230,29 +175,21 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_union_3 + Map Operator Tree: TableScan alias: src_union_3 filterExpr: - expr: ((key = 86) and (ds = '3')) - type: boolean + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -267,9 +204,6 @@ STAGE PLANS: 86 val_86 2 86 val_86 3 86 val_86 3 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_union_1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 @@ -279,23 +213,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - default__src_union_1_src_union_1_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_1_src_union_1_key_idx__ filterExpr: - expr: (ds = '1') - type: boolean Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -309,41 +235,35 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_union_1 + Map Operator Tree: TableScan alias: src_union_1 filterExpr: - expr: (ds = '1') - type: boolean + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -353,9 +273,6 @@ STAGE PLANS: Fetch Operator limit: -1 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_union_2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2')))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 @@ -365,23 +282,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - default__src_union_2_src_union_2_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_2_src_union_2_key_idx__ filterExpr: - expr: (ds = '2') - type: boolean Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -395,41 +304,35 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_union_2 + Map Operator Tree: TableScan alias: src_union_2 filterExpr: - expr: (ds = '2') - type: boolean + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -439,9 +342,6 @@ STAGE PLANS: Fetch Operator limit: -1 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_union_3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '3')))) - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 @@ -451,23 +351,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Map Reduce - Alias -> Map Operator Tree: - default__src_union_3_src_union_3_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_3_src_union_3_key_idx__ filterExpr: - expr: (ds = '3') - type: boolean Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -481,41 +373,35 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_union_3 + Map Operator Tree: TableScan alias: src_union_3 filterExpr: - expr: (ds = '3') - type: boolean + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: COMPLETE Select Operator + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -528,9 +414,6 @@ STAGE PLANS: 500 1000 1000 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_union_view))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL key) 86) (= (TOK_TABLE_OR_COL ds) '1'))))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -540,27 +423,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - default__src_union_1_src_union_1_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_1_src_union_1_key_idx__ filterExpr: - expr: ((key = 86) and (ds = '1')) - type: boolean Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -574,111 +447,27 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_union_view-subquery1-subquery1:subq-subquery1-subquery1:src_union_1 + Map Operator Tree: TableScan alias: src_union_1 filterExpr: - expr: ((key = 86) and (ds = '1')) - type: boolean - Filter Operator - predicate: - expr: (key = 86) - type: boolean - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Union - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - src_union_view-subquery1-subquery2:subq-subquery1-subquery2:src_union_2 - TableScan - alias: src_union_2 - filterExpr: - expr: ((key = 86) and (ds = '1')) - type: boolean + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key = 86) and (ds = '1')) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - src_union_view-subquery2:subq-subquery2:src_union_3 - TableScan - alias: src_union_3 - filterExpr: - expr: ((key = 86) and (ds = '1')) - type: boolean - Filter Operator - predicate: - expr: ((key = 86) and (ds = '1')) - type: boolean - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Union - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -688,9 +477,6 @@ STAGE PLANS: Fetch Operator limit: -1 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_union_view))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL key) 86) (= (TOK_TABLE_OR_COL ds) '2'))))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -700,27 +486,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - default__src_union_2_src_union_2_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_2_src_union_2_key_idx__ filterExpr: - expr: ((key = 86) and (ds = '2')) - type: boolean Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -734,111 +510,27 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_union_view-subquery1-subquery1:subq-subquery1-subquery1:src_union_1 - TableScan - alias: src_union_1 - filterExpr: - expr: ((key = 86) and (ds = '2')) - type: boolean - Filter Operator - predicate: - expr: ((key = 86) and (ds = '2')) - type: boolean - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Union - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - src_union_view-subquery1-subquery2:subq-subquery1-subquery2:src_union_2 + Map Operator Tree: TableScan alias: src_union_2 filterExpr: - expr: ((key = 86) and (ds = '2')) - type: boolean - Filter Operator - predicate: - expr: (key = 86) - type: boolean - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Union - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - src_union_view-subquery2:subq-subquery2:src_union_3 - TableScan - alias: src_union_3 - filterExpr: - expr: ((key = 86) and (ds = '2')) - type: boolean + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((key = 86) and (ds = '2')) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -848,9 +540,6 @@ STAGE PLANS: Fetch Operator limit: -1 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_union_view))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL key) 86) (= (TOK_TABLE_OR_COL ds) '3'))))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -860,27 +549,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - default__src_union_3_src_union_3_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_3_src_union_3_key_idx__ filterExpr: - expr: ((key = 86) and (ds = '3')) - type: boolean Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -894,111 +573,27 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_union_view-subquery1-subquery1:subq-subquery1-subquery1:src_union_1 - TableScan - alias: src_union_1 - filterExpr: - expr: ((key = 86) and (ds = '3')) - type: boolean - Filter Operator - predicate: - expr: ((key = 86) and (ds = '3')) - type: boolean - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Union - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - src_union_view-subquery1-subquery2:subq-subquery1-subquery2:src_union_2 - TableScan - alias: src_union_2 - filterExpr: - expr: ((key = 86) and (ds = '3')) - type: boolean - Filter Operator - predicate: - expr: ((key = 86) and (ds = '3')) - type: boolean - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Union - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - src_union_view-subquery2:subq-subquery2:src_union_3 + Map Operator Tree: TableScan alias: src_union_3 filterExpr: - expr: ((key = 86) and (ds = '3')) - type: boolean + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1008,9 +603,6 @@ STAGE PLANS: Fetch Operator limit: -1 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_union_view))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL key) 86) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL ds)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds))))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -1024,27 +616,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - default__src_union_2_src_union_2_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_2_src_union_2_key_idx__ filterExpr: - expr: (key = 86) - type: boolean Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1058,138 +640,79 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_union_view-subquery1-subquery1:subq-subquery1-subquery1:src_union_1 + Map Operator Tree: TableScan - alias: src_union_1 + alias: src_union_2 filterExpr: - expr: ((key = 86) and ds is not null) - type: boolean + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col2 - type: string + key expressions: _col2 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - src_union_view-subquery1-subquery2:subq-subquery1-subquery2:src_union_2 + Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) TableScan - alias: src_union_2 + alias: src_union_1 filterExpr: - expr: ((key = 86) and ds is not null) - type: boolean + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col2 - type: string + key expressions: _col2 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - src_union_view-subquery2:subq-subquery2:src_union_3 + Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) TableScan alias: src_union_3 filterExpr: - expr: ((key = 86) and ds is not null) - type: boolean + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col2 - type: string + key expressions: _col2 (type: string) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Extract + Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1197,27 +720,17 @@ STAGE PLANS: Stage: Stage-7 Map Reduce - Alias -> Map Operator Tree: - default__src_union_1_src_union_1_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_1_src_union_1_key_idx__ filterExpr: - expr: (key = 86) - type: boolean Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1231,27 +744,17 @@ STAGE PLANS: Stage: Stage-9 Map Reduce - Alias -> Map Operator Tree: - default__src_union_3_src_union_3_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_3_src_union_3_key_idx__ filterExpr: - expr: (key = 86) - type: boolean Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1277,9 +780,6 @@ STAGE PLANS: 86 val_86 2 86 val_86 3 86 val_86 3 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_union_view))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -1289,23 +789,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - default__src_union_1_src_union_1_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_1_src_union_1_key_idx__ filterExpr: - expr: (ds = '1') - type: boolean Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1319,117 +811,41 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_union_view-subquery1-subquery1:subq-subquery1-subquery1:src_union_1 + Map Operator Tree: TableScan alias: src_union_1 filterExpr: - expr: (ds = '1') - type: boolean + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - src_union_view-subquery1-subquery2:subq-subquery1-subquery2:src_union_2 - TableScan - alias: src_union_2 - filterExpr: - expr: (ds = '1') - type: boolean - Filter Operator - predicate: - expr: (ds = '1') - type: boolean - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Union - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - src_union_view-subquery2:subq-subquery2:src_union_3 - TableScan - alias: src_union_3 - filterExpr: - expr: (ds = '1') - type: boolean - Filter Operator - predicate: - expr: (ds = '1') - type: boolean - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Union - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1439,9 +855,6 @@ STAGE PLANS: Fetch Operator limit: -1 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_union_view))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2')))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -1451,23 +864,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - default__src_union_2_src_union_2_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_2_src_union_2_key_idx__ filterExpr: - expr: (ds = '2') - type: boolean Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1481,117 +886,41 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_union_view-subquery1-subquery1:subq-subquery1-subquery1:src_union_1 - TableScan - alias: src_union_1 - filterExpr: - expr: (ds = '2') - type: boolean - Filter Operator - predicate: - expr: (ds = '2') - type: boolean - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Union - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - src_union_view-subquery1-subquery2:subq-subquery1-subquery2:src_union_2 + Map Operator Tree: TableScan alias: src_union_2 filterExpr: - expr: (ds = '2') - type: boolean + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - src_union_view-subquery2:subq-subquery2:src_union_3 - TableScan - alias: src_union_3 - filterExpr: - expr: (ds = '2') - type: boolean - Filter Operator - predicate: - expr: (ds = '2') - type: boolean - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Union - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1601,9 +930,6 @@ STAGE PLANS: Fetch Operator limit: -1 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_union_view))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '3')))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -1613,23 +939,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - default__src_union_3_src_union_3_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_3_src_union_3_key_idx__ filterExpr: - expr: (ds = '3') - type: boolean Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1643,117 +961,41 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_union_view-subquery1-subquery1:subq-subquery1-subquery1:src_union_1 - TableScan - alias: src_union_1 - filterExpr: - expr: (ds = '3') - type: boolean - Filter Operator - predicate: - expr: (ds = '3') - type: boolean - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Union - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - src_union_view-subquery1-subquery2:subq-subquery1-subquery2:src_union_2 - TableScan - alias: src_union_2 - filterExpr: - expr: (ds = '3') - type: boolean - Filter Operator - predicate: - expr: (ds = '3') - type: boolean - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Union - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - src_union_view-subquery2:subq-subquery2:src_union_3 + Map Operator Tree: TableScan alias: src_union_3 filterExpr: - expr: (ds = '3') - type: boolean + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1766,9 +1008,6 @@ STAGE PLANS: 500 1000 1000 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_union_view))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL key) 86) (= (TOK_TABLE_OR_COL ds) '4'))))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -1778,27 +1017,17 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - default__src_union_3_src_union_3_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_3_src_union_3_key_idx__ filterExpr: - expr: ((key = 86) and (ds = '4')) - type: boolean Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1812,111 +1041,27 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_union_view-subquery1-subquery1:subq-subquery1-subquery1:src_union_1 - TableScan - alias: src_union_1 - filterExpr: - expr: ((key = 86) and (ds = '4')) - type: boolean - Filter Operator - predicate: - expr: ((key = 86) and (ds = '4')) - type: boolean - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Union - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - src_union_view-subquery1-subquery2:subq-subquery1-subquery2:src_union_2 - TableScan - alias: src_union_2 - filterExpr: - expr: ((key = 86) and (ds = '4')) - type: boolean - Filter Operator - predicate: - expr: ((key = 86) and (ds = '4')) - type: boolean - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Union - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - src_union_view-subquery2:subq-subquery2:src_union_3 + Map Operator Tree: TableScan alias: src_union_3 filterExpr: - expr: ((key = 86) and (ds = '4')) - type: boolean + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (key = 86) - type: boolean + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1927,9 +1072,6 @@ STAGE PLANS: limit: -1 86 val_86 4 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_union_view))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '4')))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-4 depends on stages: Stage-5 @@ -1939,23 +1081,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-5 Map Reduce - Alias -> Map Operator Tree: - default__src_union_3_src_union_3_key_idx__ + Map Operator Tree: TableScan alias: default__src_union_3_src_union_3_key_idx__ filterExpr: - expr: (ds = '4') - type: boolean Select Operator - expressions: - expr: _bucketname - type: string - expr: _offsets - type: array + expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 File Output Operator compressed: false - GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1969,117 +1103,41 @@ STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - src_union_view-subquery1-subquery1:subq-subquery1-subquery1:src_union_1 - TableScan - alias: src_union_1 - filterExpr: - expr: (ds = '4') - type: boolean - Filter Operator - predicate: - expr: (ds = '4') - type: boolean - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Union - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - src_union_view-subquery1-subquery2:subq-subquery1-subquery2:src_union_2 - TableScan - alias: src_union_2 - filterExpr: - expr: (ds = '4') - type: boolean - Filter Operator - predicate: - expr: (ds = '4') - type: boolean - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string - outputColumnNames: _col0, _col1, _col2 - Union - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - src_union_view-subquery2:subq-subquery2:src_union_3 + Map Operator Tree: TableScan alias: src_union_3 filterExpr: - expr: (ds = '4') - type: boolean + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: key - type: int - expr: value - type: string - expr: ds - type: string + expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(1) - bucketGroup: false + aggregations: count(1) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/vector_left_outer_join.q.out ql/src/test/results/clientpositive/vector_left_outer_join.q.out index 0113bb5..1fa39c0 100644 --- ql/src/test/results/clientpositive/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/vector_left_outer_join.q.out @@ -16,9 +16,6 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME alltypesorc) c) (TOK_TABREF (TOK_TABNAME alltypesorc) cd) (= (. (TOK_TABLE_OR_COL cd) cint) (. (TOK_TABLE_OR_COL c) cint))) (TOK_TABREF (TOK_TABNAME alltypesorc) hd) (= (. (TOK_TABLE_OR_COL hd) ctinyint) (. (TOK_TABLE_OR_COL c) ctinyint)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) ctinyint))))) t1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) - STAGE DEPENDENCIES: Stage-7 is a root stage Stage-2 depends on stages: Stage-7 @@ -38,91 +35,80 @@ STAGE PLANS: t1:cd TableScan alias: cd + Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 {ctinyint} 1 - handleSkewJoin: false keys: - 0 [Column[cint]] - 1 [Column[cint]] - Position of Big Table: 0 + 0 cint (type: int) + 1 cint (type: int) t1:hd TableScan alias: hd + Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[ctinyint]] - Position of Big Table: 0 + 0 _col0 (type: tinyint) + 1 ctinyint (type: tinyint) Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - t1:c + Map Operator Tree: TableScan alias: c + Statistics: Num rows: 47154 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 {ctinyint} 1 - handleSkewJoin: false keys: - 0 [Column[cint]] - 1 [Column[cint]] + 0 cint (type: int) + 1 cint (type: int) outputColumnNames: _col0 - Position of Big Table: 0 - Vectorized execution: true + Statistics: Num rows: 103739 Data size: 414960 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 1 - handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[ctinyint]] - Position of Big Table: 0 - Vectorized execution: true + 0 _col0 (type: tinyint) + 1 ctinyint (type: tinyint) + Statistics: Num rows: 114112 Data size: 456456 Basic stats: COMPLETE Column stats: NONE Select Operator - Vectorized execution: true + Statistics: Num rows: 114112 Data size: 456456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count() - bucketGroup: false + aggregations: count() mode: hash outputColumnNames: _col0 - Vectorized execution: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint + expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/vectorization_div0.q.out ql/src/test/results/clientpositive/vectorization_div0.q.out index 97a3f39..457000f 100644 --- ql/src/test/results/clientpositive/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/vectorization_div0.q.out @@ -6,9 +6,6 @@ POSTHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as explain select cdouble / 0.0 from alltypesorc limit 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (/ (TOK_TABLE_OR_COL cdouble) 0.0))) (TOK_LIMIT 100))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -16,26 +13,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 47154 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (cdouble / 0.0) - type: double + expressions: (cdouble / 0.0) (type: double) outputColumnNames: _col0 - Vectorized execution: true + Statistics: Num rows: 47154 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Limit - Vectorized execution: true + Number of rows: 100 + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Vectorized execution: true + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -163,9 +159,6 @@ explain select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (- (TOK_TABLE_OR_COL cbigint) 988888L) s1) (TOK_SELEXPR (/ (TOK_TABLE_OR_COL cdouble) (- (TOK_TABLE_OR_COL cbigint) 988888L)) s2) (TOK_SELEXPR (/ 1.2 (- (TOK_TABLE_OR_COL cbigint) 988888L)))) (TOK_WHERE (and (> (TOK_TABLE_OR_COL cbigint) 0) (< (TOK_TABLE_OR_COL cbigint) 100000000))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL s1)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL s2))) (TOK_LIMIT 100))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -173,47 +166,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 23577 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((cbigint > 0) and (cbigint < 100000000)) - type: boolean - Vectorized execution: true + predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean) + Statistics: Num rows: 2619 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (cbigint - 988888) - type: bigint - expr: (cdouble / (cbigint - 988888)) - type: double - expr: (1.2 / (cbigint - 988888)) - type: double + expressions: (cbigint - 988888) (type: bigint), (cdouble / (cbigint - 988888)) (type: double), (1.2 / (cbigint - 988888)) (type: double) outputColumnNames: _col0, _col1, _col2 - Vectorized execution: true + Statistics: Num rows: 2619 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: bigint - expr: _col1 - type: double + key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: double - expr: _col2 - type: double - Vectorized execution: true + Statistics: Num rows: 2619 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: double), _col2 (type: double) + Execution mode: vectorized Reduce Operator Tree: Extract + Statistics: Num rows: 2619 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -345,9 +323,6 @@ explain select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 1 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL cdouble) 200.0) s1) (TOK_SELEXPR (/ (TOK_TABLE_OR_COL cbigint) (+ (TOK_TABLE_OR_COL cdouble) 200.0)) s2) (TOK_SELEXPR (/ (+ (TOK_TABLE_OR_COL cdouble) 200.0) (+ (TOK_TABLE_OR_COL cdouble) 200.0))) (TOK_SELEXPR (/ (TOK_TABLE_OR_COL cbigint) (+ (TOK_TABLE_OR_COL cdouble) 200.0))) (TOK_SELEXPR (/ 1 (+ (TOK_TABLE_OR_COL cdouble) 200.0))) (TOK_SELEXPR (/ 1.2 (+ (TOK_TABLE_OR_COL cdouble) 200.0)))) (TOK_WHERE (and (>= (TOK_TABLE_OR_COL cdouble) (- 500)) (< (TOK_TABLE_OR_COL cdouble) (- 199)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL s1)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL s2))) (TOK_LIMIT 100))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -355,59 +330,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 23577 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((cdouble >= (- 500)) and (cdouble < (- 199))) - type: boolean - Vectorized execution: true + predicate: ((cdouble >= (- 500)) and (cdouble < (- 199))) (type: boolean) + Statistics: Num rows: 2619 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (cdouble + 200.0) - type: double - expr: (cbigint / (cdouble + 200.0)) - type: double - expr: ((cdouble + 200.0) / (cdouble + 200.0)) - type: double - expr: (cbigint / (cdouble + 200.0)) - type: double - expr: (1 / (cdouble + 200.0)) - type: double - expr: (1.2 / (cdouble + 200.0)) - type: double + expressions: (cdouble + 200.0) (type: double), (cbigint / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (cbigint / (cdouble + 200.0)) (type: double), (1 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Vectorized execution: true + Statistics: Num rows: 2619 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double - expr: _col1 - type: double + key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: double - expr: _col1 - type: double - expr: _col2 - type: double - expr: _col3 - type: double - expr: _col4 - type: double - expr: _col5 - type: double - Vectorized execution: true + Statistics: Num rows: 2619 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double) + Execution mode: vectorized Reduce Operator Tree: Extract + Statistics: Num rows: 2619 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index 2fd7edd..51a4e81 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -3,9 +3,6 @@ PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < PREHOOK: type: QUERY POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL cbigint)) (TOK_SELEXPR (TOK_TABLE_OR_COL cdouble))) (TOK_WHERE (and (< (TOK_TABLE_OR_COL cbigint) (TOK_TABLE_OR_COL cdouble)) (> (TOK_TABLE_OR_COL cint) 0))) (TOK_LIMIT 7))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -13,33 +10,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 18861 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((cbigint < cdouble) and (cint > 0)) - type: boolean - Vectorized execution: true + predicate: ((cbigint < cdouble) and (cint > 0)) (type: boolean) + Statistics: Num rows: 2095 Data size: 41901 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: cbigint - type: bigint - expr: cdouble - type: double + expressions: cbigint (type: bigint), cdouble (type: double) outputColumnNames: _col0, _col1 - Vectorized execution: true + Statistics: Num rows: 2095 Data size: 41901 Basic stats: COMPLETE Column stats: NONE Limit - Vectorized execution: true + Number of rows: 7 + Statistics: Num rows: 7 Data size: 140 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 7 Data size: 140 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Vectorized execution: true + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -71,9 +63,6 @@ POSTHOOK: query: -- HIVE-3562 Some limit can be pushed down to map stage - c/p p explain select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ctinyint)) (TOK_SELEXPR (TOK_TABLE_OR_COL cdouble)) (TOK_SELEXPR (TOK_TABLE_OR_COL csmallint))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL ctinyint))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ctinyint)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cdouble))) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -81,49 +70,33 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 23577 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ctinyint is not null - type: boolean - Vectorized execution: true + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 11789 Data size: 188626 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: ctinyint - type: tinyint - expr: cdouble - type: double - expr: csmallint - type: smallint + expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 - Vectorized execution: true + Statistics: Num rows: 11789 Data size: 188626 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: double + key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ - tag: -1 - TopN: 20 + Statistics: Num rows: 11789 Data size: 188626 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: double - expr: _col2 - type: smallint - Vectorized execution: true + value expressions: _col0 (type: tinyint), _col1 (type: double), _col2 (type: smallint) + Execution mode: vectorized Reduce Operator Tree: Extract + Statistics: Num rows: 11789 Data size: 188626 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -169,9 +142,6 @@ POSTHOOK: query: -- deduped RS explain select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ctinyint)) (TOK_SELEXPR (TOK_FUNCTION avg (+ (TOK_TABLE_OR_COL cdouble) 1)))) (TOK_GROUPBY (TOK_TABLE_OR_COL ctinyint)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ctinyint))) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -179,63 +149,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: ctinyint - type: tinyint - expr: cdouble - type: double + expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble - Vectorized execution: true + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: avg((cdouble + 1)) - bucketGroup: false - keys: - expr: ctinyint - type: tinyint + aggregations: avg((cdouble + 1)) + keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0, _col1 - Vectorized execution: true + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: tinyint + key expressions: _col0 (type: tinyint) sort order: + - Map-reduce partition columns: - expr: _col0 - type: tinyint - tag: -1 - TopN: 20 + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: - expr: _col1 - type: struct + value expressions: _col1 (type: struct) + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: - expr: avg(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: tinyint + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15718 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: double + expressions: _col0 (type: tinyint), _col1 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15718 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -281,9 +233,6 @@ POSTHOOK: query: -- distincts explain select distinct(ctinyint) from alltypesorc limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL ctinyint))) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -291,52 +240,42 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: ctinyint - type: tinyint + expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint - Vectorized execution: true + Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - bucketGroup: false - keys: - expr: ctinyint - type: tinyint + keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0 - Vectorized execution: true + Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: tinyint + key expressions: _col0 (type: tinyint) sort order: + - Map-reduce partition columns: - expr: _col0 - type: tinyint - tag: -1 - TopN: 20 + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 + Execution mode: vectorized Reduce Operator Tree: Group By Operator - bucketGroup: false - keys: - expr: KEY._col0 - type: tinyint + keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 47154 Data size: 188616 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: tinyint + expressions: _col0 (type: tinyint) outputColumnNames: _col0 + Statistics: Num rows: 47154 Data size: 188616 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -380,9 +319,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ctinyint)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL cdouble)))) (TOK_GROUPBY (TOK_TABLE_OR_COL ctinyint)) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -390,67 +326,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: ctinyint - type: tinyint - expr: cdouble - type: double + expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble - Vectorized execution: true + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(DISTINCT cdouble) - bucketGroup: false - keys: - expr: ctinyint - type: tinyint - expr: cdouble - type: double + aggregations: count(DISTINCT cdouble) + keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 - Vectorized execution: true + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: double + key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: tinyint - tag: -1 - TopN: 20 + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: - expr: _col2 - type: bigint + value expressions: _col2 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: - expr: count(DISTINCT KEY._col1:0._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: tinyint + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 15718 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: bigint + expressions: _col0 (type: tinyint), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 15718 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -496,9 +410,6 @@ POSTHOOK: query: -- limit zero explain select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ctinyint)) (TOK_SELEXPR (TOK_TABLE_OR_COL cdouble))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ctinyint))) (TOK_LIMIT 0))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -506,36 +417,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: ctinyint - type: tinyint - expr: cdouble - type: double + expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: _col0, _col1 - Vectorized execution: true + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: tinyint + key expressions: _col0 (type: tinyint) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: tinyint - expr: _col1 - type: double - Vectorized execution: true + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: double) + Execution mode: vectorized Reduce Operator Tree: Extract + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -561,9 +465,6 @@ POSTHOOK: query: -- 2MR (applied to last RS) explain select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL cdouble)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL ctinyint)) sum)) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL ctinyint))) (TOK_GROUPBY (TOK_TABLE_OR_COL cdouble)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL sum)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cdouble))) (TOK_LIMIT 20))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -572,65 +473,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ctinyint is not null - type: boolean - Vectorized execution: true + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 15718 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: cdouble - type: double - expr: ctinyint - type: tinyint + expressions: cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cdouble, ctinyint - Vectorized execution: true + Statistics: Num rows: 15718 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: sum(ctinyint) - bucketGroup: false - keys: - expr: cdouble - type: double + aggregations: sum(ctinyint) + keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 - Vectorized execution: true + Statistics: Num rows: 15718 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - Map-reduce partition columns: - expr: _col0 - type: double - tag: -1 - value expressions: - expr: _col1 - type: bigint + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 15718 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: double + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 7859 Data size: 94309 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: bigint + expressions: _col0 (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 7859 Data size: 94309 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -638,30 +517,23 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col1 - type: bigint - expr: _col0 - type: double + key expressions: _col1 (type: bigint), _col0 (type: double) sort order: ++ - tag: -1 - TopN: 20 + Statistics: Num rows: 7859 Data size: 94309 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: - expr: _col0 - type: double - expr: _col1 - type: bigint + value expressions: _col0 (type: double), _col1 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 7859 Data size: 94309 Basic stats: COMPLETE Column stats: NONE Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/vectorization_part_project.q.out ql/src/test/results/clientpositive/vectorization_part_project.q.out index 1186f50..faab239 100644 --- ql/src/test/results/clientpositive/vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/vectorization_part_project.q.out @@ -83,9 +83,6 @@ POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring2 SIMPLE [(alltype POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:from deserializer), ] POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:from deserializer), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL cdouble) 2))) (TOK_LIMIT 10))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -93,26 +90,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc_part + Map Operator Tree: TableScan alias: alltypesorc_part + Statistics: Num rows: 200 Data size: 41576 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (cdouble + 2) - type: double + expressions: (cdouble + 2) (type: double) outputColumnNames: _col0 - Vectorized execution: true + Statistics: Num rows: 200 Data size: 41576 Basic stats: COMPLETE Column stats: NONE Limit - Vectorized execution: true + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2070 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 10 Data size: 2070 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Vectorized execution: true + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorization_pushdown.q.out ql/src/test/results/clientpositive/vectorization_pushdown.q.out index 5d1f5f6..4bbb467 100644 --- ql/src/test/results/clientpositive/vectorization_pushdown.q.out +++ ql/src/test/results/clientpositive/vectorization_pushdown.q.out @@ -3,9 +3,6 @@ PREHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdo PREHOOK: type: QUERY POSTHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cbigint)))) (TOK_WHERE (< (TOK_TABLE_OR_COL cbigint) (TOK_TABLE_OR_COL cdouble))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -13,52 +10,41 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc filterExpr: - expr: (cbigint < cdouble) - type: boolean + Statistics: Num rows: 23577 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (cbigint < cdouble) - type: boolean - Vectorized execution: true + predicate: (cbigint < cdouble) (type: boolean) + Statistics: Num rows: 7859 Data size: 125745 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: cbigint - type: bigint + expressions: cbigint (type: bigint) outputColumnNames: cbigint - Vectorized execution: true + Statistics: Num rows: 7859 Data size: 125745 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: avg(cbigint) - bucketGroup: false + aggregations: avg(cbigint) mode: hash outputColumnNames: _col0 - Vectorized execution: true + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: - expr: avg(VALUE._col0) - bucketGroup: false + aggregations: avg(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double + expressions: _col0 (type: double) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/vectorization_short_regress.q.out ql/src/test/results/clientpositive/vectorization_short_regress.q.out index 99c904a..305d336 100644 --- ql/src/test/results/clientpositive/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/vectorization_short_regress.q.out @@ -131,9 +131,6 @@ WHERE ((762 = cbigint) AND ((79.553 != cint) AND (cboolean2 != cboolean1))))) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (+ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint)) (- 3728))) (TOK_SELEXPR (- (+ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint)) (- 3728)))) (TOK_SELEXPR (- (- (+ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint)) (- 3728))))) (TOK_SELEXPR (* (- (- (+ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint)) (- 3728)))) (+ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint)) (- 3728)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (- (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint)))) (TOK_SELEXPR (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (* (* (- (- (+ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint)) (- 3728)))) (+ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint)) (- 3728))) (- (- (+ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint)) (- 3728)))))) (TOK_SELEXPR (TOK_FUNCTION STDDEV_SAMP (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)))) (TOK_SELEXPR (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)) (- (- (+ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint)) (- 3728)))))) (TOK_SELEXPR (* (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)) (- (- (+ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint)) (- 3728))))) (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)))) (TOK_SELEXPR (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (- 10.175 (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cint)))) (TOK_SELEXPR (- (- 10.175 (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cint))))) (TOK_SELEXPR (/ (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))) (- 563))) (TOK_SELEXPR (TOK_FUNCTION STDDEV_SAMP (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (- (/ (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))) (- 563)))) (TOK_SELEXPR (/ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint)) (TOK_FUNCTION SUM (TOK_TABLE_OR_COL cdouble)))) (TOK_SELEXPR (TOK_FUNCTION MIN (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (TOK_FUNCTION COUNT (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (/ (TOK_FUNCTION MIN (TOK_TABLE_OR_COL ctinyint)) (/ (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))) (- 563)))) (TOK_SELEXPR (- (/ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint)) (TOK_FUNCTION SUM (TOK_TABLE_OR_COL cdouble)))))) (TOK_WHERE (OR (OR (OR (= 762 (TOK_TABLE_OR_COL cbigint)) (AND (< (TOK_TABLE_OR_COL csmallint) (TOK_TABLE_OR_COL cfloat)) (AND (> (TOK_TABLE_OR_COL ctimestamp2) (- 10669)) (!= (TOK_TABLE_OR_COL cdouble) (TOK_TABLE_OR_COL cint))))) (= (TOK_TABLE_OR_COL cstring1) 'a')) (AND (<= (TOK_TABLE_OR_COL cbigint) (- 1.389)) (AND (!= (TOK_TABLE_OR_COL cstring2) 'a') (AND (!= 79.553 (TOK_TABLE_OR_COL cint)) (!= (TOK_TABLE_OR_COL cboolean2) (TOK_TABLE_OR_COL cboolean1))))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -141,137 +138,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 1347 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((762 = cbigint) or ((csmallint < cfloat) and ((ctimestamp2 > (- 10669)) and (cdouble <> cint)))) or (cstring1 = 'a')) or ((cbigint <= (- 1.389)) and ((cstring2 <> 'a') and ((79.553 <> cint) and (cboolean2 <> cboolean1))))) - type: boolean - Vectorized execution: true + predicate: ((((762 = cbigint) or ((csmallint < cfloat) and ((ctimestamp2 > (- 10669)) and (cdouble <> cint)))) or (cstring1 = 'a')) or ((cbigint <= (- 1.389)) and ((cstring2 <> 'a') and ((79.553 <> cint) and (cboolean2 <> cboolean1))))) (type: boolean) + Statistics: Num rows: 1347 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: cint - type: int - expr: cdouble - type: double - expr: csmallint - type: smallint - expr: cfloat - type: float - expr: ctinyint - type: tinyint + expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cint, cdouble, csmallint, cfloat, ctinyint - Vectorized execution: true + Statistics: Num rows: 1347 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: avg(cint) - expr: sum(cdouble) - expr: stddev_pop(cint) - expr: stddev_samp(csmallint) - expr: var_samp(cint) - expr: avg(cfloat) - expr: stddev_samp(cint) - expr: min(ctinyint) - expr: count(csmallint) - bucketGroup: false + aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Vectorized execution: true + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct - expr: _col1 - type: double - expr: _col2 - type: struct - expr: _col3 - type: struct - expr: _col4 - type: struct - expr: _col5 - type: struct - expr: _col6 - type: struct - expr: _col7 - type: tinyint - expr: _col8 - type: bigint + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: - expr: avg(VALUE._col0) - expr: sum(VALUE._col1) - expr: stddev_pop(VALUE._col2) - expr: stddev_samp(VALUE._col3) - expr: var_samp(VALUE._col4) - expr: avg(VALUE._col5) - expr: stddev_samp(VALUE._col6) - expr: min(VALUE._col7) - expr: count(VALUE._col8) - bucketGroup: false + aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: (_col0 + (- 3728)) - type: double - expr: (- (_col0 + (- 3728))) - type: double - expr: (- (- (_col0 + (- 3728)))) - type: double - expr: ((- (- (_col0 + (- 3728)))) * (_col0 + (- 3728))) - type: double - expr: _col1 - type: double - expr: (- _col0) - type: double - expr: _col2 - type: double - expr: (((- (- (_col0 + (- 3728)))) * (_col0 + (- 3728))) * (- (- (_col0 + (- 3728))))) - type: double - expr: _col3 - type: double - expr: (- _col2) - type: double - expr: (_col2 - (- (- (_col0 + (- 3728))))) - type: double - expr: ((_col2 - (- (- (_col0 + (- 3728))))) * _col2) - type: double - expr: _col4 - type: double - expr: _col5 - type: double - expr: (10.175 - _col4) - type: double - expr: (- (10.175 - _col4)) - type: double - expr: ((- _col2) / (- 563)) - type: double - expr: _col6 - type: double - expr: (- ((- _col2) / (- 563))) - type: double - expr: (_col0 / _col1) - type: double - expr: _col7 - type: tinyint - expr: _col8 - type: bigint - expr: (_col7 / ((- _col2) / (- 563))) - type: double - expr: (- (_col0 / _col1)) - type: double + expressions: _col0 (type: double), (_col0 + (- 3728)) (type: double), (- (_col0 + (- 3728))) (type: double), (- (- (_col0 + (- 3728)))) (type: double), ((- (- (_col0 + (- 3728)))) * (_col0 + (- 3728))) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + (- 3728)))) * (_col0 + (- 3728))) * (- (- (_col0 + (- 3728))))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + (- 3728))))) (type: double), ((_col2 - (- (- (_col0 + (- 3728))))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / (- 563)) (type: double), _col6 (type: double), (- ((- _col2) / (- 563))) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (_col7 / ((- _col2) / (- 563))) (type: double), (- (_col0 / _col1)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -435,9 +335,6 @@ WHERE (((cbigint <= 197) OR ((cfloat > 79.553) AND (cstring2 LIKE '10%'))) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (/ (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cint)) (- 3728))) (TOK_SELEXPR (* (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cint)) (- 3728))) (TOK_SELEXPR (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (- (* (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cint)) (- 3728)))) (TOK_SELEXPR (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (% (- 563) (* (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cint)) (- 3728)))) (TOK_SELEXPR (/ (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cbigint)) (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL csmallint)))) (TOK_SELEXPR (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL csmallint)))) (TOK_SELEXPR (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION AVG (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL csmallint)) 10.175)) (TOK_SELEXPR (TOK_FUNCTION MIN (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (% (* (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cint)) (- 3728)) (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL csmallint)) 10.175))) (TOK_SELEXPR (- (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cdouble)))) (TOK_SELEXPR (TOK_FUNCTION MIN (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (% (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cdouble)) (- 26.28))) (TOK_SELEXPR (TOK_FUNCTION STDDEV_SAMP (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (- (/ (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cint)) (- 3728)))) (TOK_SELEXPR (% (- (* (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cint)) (- 3728))) (% (- 563) (* (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cint)) (- 3728))))) (TOK_SELEXPR (- (/ (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cint)) (- 3728)) (TOK_FUNCTION AVG (TOK_TABLE_OR_COL ctinyint)))) (TOK_SELEXPR (- (* (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cint)) (- 3728)))) (TOK_SELEXPR (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cint)))) (TOK_WHERE (OR (OR (OR (AND (<= (TOK_TABLE_OR_COL cbigint) 197) (< (TOK_TABLE_OR_COL cint) (TOK_TABLE_OR_COL cbigint))) (AND (>= (TOK_TABLE_OR_COL cdouble) (- 26.28)) (> (TOK_TABLE_OR_COL csmallint) (TOK_TABLE_OR_COL cdouble)))) (AND (> (TOK_TABLE_OR_COL ctinyint) (TOK_TABLE_OR_COL cfloat)) (RLIKE (TOK_TABLE_OR_COL cstring1) '.*ss.*'))) (AND (> (TOK_TABLE_OR_COL cfloat) 79.553) (LIKE (TOK_TABLE_OR_COL cstring2) '10%')))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -445,133 +342,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 1626 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((cbigint <= 197) and (cint < cbigint)) or ((cdouble >= (- 26.28)) and (csmallint > cdouble))) or ((ctinyint > cfloat) and (cstring1 rlike '.*ss.*'))) or ((cfloat > 79.553) and (cstring2 like '10%'))) - type: boolean - Vectorized execution: true + predicate: (((((cbigint <= 197) and (cint < cbigint)) or ((cdouble >= (- 26.28)) and (csmallint > cdouble))) or ((ctinyint > cfloat) and (cstring1 rlike '.*ss.*'))) or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean) + Statistics: Num rows: 902 Data size: 209266 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: cint - type: int - expr: cbigint - type: bigint - expr: csmallint - type: smallint - expr: cdouble - type: double - expr: ctinyint - type: tinyint + expressions: cint (type: int), cbigint (type: bigint), csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cint, cbigint, csmallint, cdouble, ctinyint - Vectorized execution: true + Statistics: Num rows: 902 Data size: 209266 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: max(cint) - expr: var_pop(cbigint) - expr: stddev_pop(csmallint) - expr: max(cdouble) - expr: avg(ctinyint) - expr: min(cint) - expr: min(cdouble) - expr: stddev_samp(csmallint) - expr: var_samp(cint) - bucketGroup: false + aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Vectorized execution: true + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: struct - expr: _col2 - type: struct - expr: _col3 - type: double - expr: _col4 - type: struct - expr: _col5 - type: int - expr: _col6 - type: double - expr: _col7 - type: struct - expr: _col8 - type: struct + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: int), _col6 (type: double), _col7 (type: struct), _col8 (type: struct) + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - expr: var_pop(VALUE._col1) - expr: stddev_pop(VALUE._col2) - expr: max(VALUE._col3) - expr: avg(VALUE._col4) - expr: min(VALUE._col5) - expr: min(VALUE._col6) - expr: stddev_samp(VALUE._col7) - expr: var_samp(VALUE._col8) - bucketGroup: false + aggregations: max(VALUE._col0), var_pop(VALUE._col1), stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: int - expr: (_col0 / (- 3728)) - type: decimal(16,6) - expr: (_col0 * (- 3728)) - type: int - expr: _col1 - type: double - expr: (- (_col0 * (- 3728))) - type: int - expr: _col2 - type: double - expr: ((- 563) % (_col0 * (- 3728))) - type: int - expr: (_col1 / _col2) - type: double - expr: (- _col2) - type: double - expr: _col3 - type: double - expr: _col4 - type: double - expr: (_col2 - 10.175) - type: double - expr: _col5 - type: int - expr: ((_col0 * (- 3728)) % (_col2 - 10.175)) - type: double - expr: (- _col3) - type: double - expr: _col6 - type: double - expr: (_col3 % (- 26.28)) - type: double - expr: _col7 - type: double - expr: (- (_col0 / (- 3728))) - type: decimal(16,6) - expr: ((- (_col0 * (- 3728))) % ((- 563) % (_col0 * (- 3728)))) - type: int - expr: ((_col0 / (- 3728)) - _col4) - type: double - expr: (- (_col0 * (- 3728))) - type: int - expr: _col8 - type: double + expressions: _col0 (type: int), (_col0 / (- 3728)) (type: decimal(16,6)), (_col0 * (- 3728)) (type: int), _col1 (type: double), (- (_col0 * (- 3728))) (type: int), _col2 (type: double), ((- 563) % (_col0 * (- 3728))) (type: int), (_col1 / _col2) (type: double), (- _col2) (type: double), _col3 (type: double), _col4 (type: double), (_col2 - 10.175) (type: double), _col5 (type: int), ((_col0 * (- 3728)) % (_col2 - 10.175)) (type: double), (- _col3) (type: double), _col6 (type: double), (_col3 % (- 26.28)) (type: double), _col7 (type: double), (- (_col0 / (- 3728))) (type: decimal(16,6)), ((- (_col0 * (- 3728))) % ((- 563) % (_col0 * (- 3728)))) (type: int), ((_col0 / (- 3728)) - _col4) (type: double), (- (_col0 * (- 3728))) (type: int), _col8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -726,9 +530,6 @@ WHERE ((ctimestamp1 = ctimestamp2) AND ((ctimestamp2 IS NOT NULL) AND (cstring2 > 'a')))) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (- (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cbigint)))) (TOK_SELEXPR (- (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cbigint)) (- (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cbigint))))) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT)) (TOK_SELEXPR (% (TOK_FUNCTIONSTAR COUNT) 79.553)) (TOK_SELEXPR (TOK_FUNCTION MAX (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (- (TOK_FUNCTIONSTAR COUNT) (- (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cbigint))))) (TOK_SELEXPR (- (- (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cbigint))))) (TOK_SELEXPR (% (- 1) (- (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cbigint))))) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT)) (TOK_SELEXPR (- (TOK_FUNCTIONSTAR COUNT))) (TOK_SELEXPR (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (- (- (- (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cbigint)))))) (TOK_SELEXPR (* 762 (- (TOK_FUNCTIONSTAR COUNT)))) (TOK_SELEXPR (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (+ (TOK_FUNCTION MAX (TOK_TABLE_OR_COL ctinyint)) (* 762 (- (TOK_FUNCTIONSTAR COUNT))))) (TOK_SELEXPR (+ (- (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cbigint))) (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cint)))) (TOK_SELEXPR (TOK_FUNCTION STDDEV_SAMP (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (% (- (TOK_FUNCTIONSTAR COUNT)) (TOK_FUNCTIONSTAR COUNT))) (TOK_SELEXPR (TOK_FUNCTION COUNT (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (TOK_FUNCTION AVG (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (% (- 3728) (+ (TOK_FUNCTION MAX (TOK_TABLE_OR_COL ctinyint)) (* 762 (- (TOK_FUNCTIONSTAR COUNT))))))) (TOK_WHERE (OR (OR (OR (OR (= (TOK_TABLE_OR_COL ctimestamp1) (TOK_TABLE_OR_COL ctimestamp2)) (= 762 (TOK_TABLE_OR_COL cfloat))) (= (TOK_TABLE_OR_COL cstring1) 'ss')) (AND (<= (TOK_TABLE_OR_COL csmallint) (TOK_TABLE_OR_COL cbigint)) (= 1 (TOK_TABLE_OR_COL cboolean2)))) (AND (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL cboolean1)) (AND (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL ctimestamp2)) (> (TOK_TABLE_OR_COL cstring2) 'a'))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -736,127 +537,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 1178 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((ctimestamp1 = ctimestamp2) or (762.0 = cfloat)) or (cstring1 = 'ss')) or ((csmallint <= cbigint) and (1 = cboolean2))) or (cboolean1 is not null and (ctimestamp2 is not null and (cstring2 > 'a')))) - type: boolean - Vectorized execution: true + predicate: (((((ctimestamp1 = ctimestamp2) or (762.0 = cfloat)) or (cstring1 = 'ss')) or ((csmallint <= cbigint) and (1 = cboolean2))) or (cboolean1 is not null and (ctimestamp2 is not null and (cstring2 > 'a')))) (type: boolean) + Statistics: Num rows: 1178 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: cbigint - type: bigint - expr: ctinyint - type: tinyint - expr: csmallint - type: smallint - expr: cint - type: int - expr: cdouble - type: double + expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double) outputColumnNames: cbigint, ctinyint, csmallint, cint, cdouble - Vectorized execution: true + Statistics: Num rows: 1178 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: var_pop(cbigint) - expr: count() - expr: max(ctinyint) - expr: stddev_pop(csmallint) - expr: max(cint) - expr: stddev_samp(cdouble) - expr: count(ctinyint) - expr: avg(ctinyint) - bucketGroup: false + aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Vectorized execution: true + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct - expr: _col1 - type: bigint - expr: _col2 - type: tinyint - expr: _col3 - type: struct - expr: _col4 - type: int - expr: _col5 - type: struct - expr: _col6 - type: bigint - expr: _col7 - type: struct + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: struct), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct) + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: - expr: var_pop(VALUE._col0) - expr: count(VALUE._col1) - expr: max(VALUE._col2) - expr: stddev_pop(VALUE._col3) - expr: max(VALUE._col4) - expr: stddev_samp(VALUE._col5) - expr: count(VALUE._col6) - expr: avg(VALUE._col7) - bucketGroup: false + aggregations: var_pop(VALUE._col0), count(VALUE._col1), max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: (- _col0) - type: double - expr: (_col0 - (- _col0)) - type: double - expr: _col1 - type: bigint - expr: (_col1 % 79.553) - type: double - expr: _col2 - type: tinyint - expr: (_col1 - (- _col0)) - type: double - expr: (- (- _col0)) - type: double - expr: ((- 1) % (- _col0)) - type: double - expr: _col1 - type: bigint - expr: (- _col1) - type: bigint - expr: _col3 - type: double - expr: (- (- (- _col0))) - type: double - expr: (762 * (- _col1)) - type: bigint - expr: _col4 - type: int - expr: (_col2 + (762 * (- _col1))) - type: bigint - expr: ((- _col0) + _col4) - type: double - expr: _col5 - type: double - expr: ((- _col1) % _col1) - type: bigint - expr: _col6 - type: bigint - expr: _col7 - type: double - expr: ((- 3728) % (_col2 + (762 * (- _col1)))) - type: bigint + expressions: _col0 (type: double), (- _col0) (type: double), (_col0 - (- _col0)) (type: double), _col1 (type: bigint), (_col1 % 79.553) (type: double), _col2 (type: tinyint), (_col1 - (- _col0)) (type: double), (- (- _col0)) (type: double), ((- 1) % (- _col0)) (type: double), _col1 (type: bigint), (- _col1) (type: bigint), _col3 (type: double), (- (- (- _col0))) (type: double), (762 * (- _col1)) (type: bigint), _col4 (type: int), (_col2 + (762 * (- _col1))) (type: bigint), ((- _col0) + _col4) (type: double), _col5 (type: double), ((- _col1) % _col1) (type: bigint), _col6 (type: bigint), _col7 (type: double), ((- 3728) % (_col2 + (762 * (- _col1)))) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -990,9 +704,6 @@ WHERE (((ctimestamp2 <= ctimestamp1) AND (ctimestamp1 >= 0)) OR (cfloat = 17)) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION AVG (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (+ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL ctinyint)) 6981)) (TOK_SELEXPR (+ (+ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL ctinyint)) 6981) (TOK_FUNCTION AVG (TOK_TABLE_OR_COL ctinyint)))) (TOK_SELEXPR (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (/ (+ (+ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL ctinyint)) 6981) (TOK_FUNCTION AVG (TOK_TABLE_OR_COL ctinyint))) (TOK_FUNCTION AVG (TOK_TABLE_OR_COL ctinyint)))) (TOK_SELEXPR (- (+ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL ctinyint)) 6981))) (TOK_SELEXPR (TOK_FUNCTION STDDEV_SAMP (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (% (TOK_FUNCTION AVG (TOK_TABLE_OR_COL ctinyint)) (- (+ (TOK_FUNCTION AVG (TOK_TABLE_OR_COL ctinyint)) 6981)))) (TOK_SELEXPR (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (- (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cbigint)))) (TOK_SELEXPR (/ (- (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cbigint))) (TOK_FUNCTION STDDEV_SAMP (TOK_TABLE_OR_COL cint)))) (TOK_SELEXPR (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (* (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cbigint)) (- 26.28)))) (TOK_WHERE (OR (OR (AND (<= (TOK_TABLE_OR_COL ctimestamp2) (TOK_TABLE_OR_COL ctimestamp1)) (AND (!= (TOK_TABLE_OR_COL cbigint) (TOK_TABLE_OR_COL cdouble)) (<= 'ss' (TOK_TABLE_OR_COL cstring1)))) (AND (< (TOK_TABLE_OR_COL csmallint) (TOK_TABLE_OR_COL ctinyint)) (>= (TOK_TABLE_OR_COL ctimestamp1) 0))) (= (TOK_TABLE_OR_COL cfloat) 17))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1000,101 +711,40 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 1779 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((ctimestamp2 <= ctimestamp1) and ((cbigint <> cdouble) and ('ss' <= cstring1))) or ((csmallint < ctinyint) and (ctimestamp1 >= 0))) or (cfloat = 17.0)) - type: boolean - Vectorized execution: true + predicate: ((((ctimestamp2 <= ctimestamp1) and ((cbigint <> cdouble) and ('ss' <= cstring1))) or ((csmallint < ctinyint) and (ctimestamp1 >= 0))) or (cfloat = 17.0)) (type: boolean) + Statistics: Num rows: 1283 Data size: 272060 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: ctinyint - type: tinyint - expr: cbigint - type: bigint - expr: cint - type: int - expr: cfloat - type: float + expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float) outputColumnNames: ctinyint, cbigint, cint, cfloat - Vectorized execution: true + Statistics: Num rows: 1283 Data size: 272060 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: avg(ctinyint) - expr: max(cbigint) - expr: stddev_samp(cint) - expr: var_pop(cint) - expr: var_pop(cbigint) - expr: max(cfloat) - bucketGroup: false + aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Vectorized execution: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: struct - expr: _col1 - type: bigint - expr: _col2 - type: struct - expr: _col3 - type: struct - expr: _col4 - type: struct - expr: _col5 - type: float + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: float) + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: - expr: avg(VALUE._col0) - expr: max(VALUE._col1) - expr: stddev_samp(VALUE._col2) - expr: var_pop(VALUE._col3) - expr: var_pop(VALUE._col4) - expr: max(VALUE._col5) - bucketGroup: false + aggregations: avg(VALUE._col0), max(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), max(VALUE._col5) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: (_col0 + 6981) - type: double - expr: ((_col0 + 6981) + _col0) - type: double - expr: _col1 - type: bigint - expr: (((_col0 + 6981) + _col0) / _col0) - type: double - expr: (- (_col0 + 6981)) - type: double - expr: _col2 - type: double - expr: (_col0 % (- (_col0 + 6981))) - type: double - expr: _col3 - type: double - expr: _col4 - type: double - expr: (- _col1) - type: bigint - expr: ((- _col1) / _col2) - type: double - expr: _col5 - type: float - expr: (_col4 * (- 26.28)) - type: double + expressions: _col0 (type: double), (_col0 + 6981) (type: double), ((_col0 + 6981) + _col0) (type: double), _col1 (type: bigint), (((_col0 + 6981) + _col0) / _col0) (type: double), (- (_col0 + 6981)) (type: double), _col2 (type: double), (_col0 % (- (_col0 + 6981))) (type: double), _col3 (type: double), _col4 (type: double), (- _col1) (type: bigint), ((- _col1) / _col2) (type: double), _col5 (type: float), (_col4 * (- 26.28)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1232,9 +882,6 @@ WHERE (((cstring1 RLIKE 'a.*') OR ((cint < cbigint) AND (ctinyint > cbigint))) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL cint)) (TOK_SELEXPR (TOK_TABLE_OR_COL cdouble)) (TOK_SELEXPR (TOK_TABLE_OR_COL ctimestamp2)) (TOK_SELEXPR (TOK_TABLE_OR_COL cstring1)) (TOK_SELEXPR (TOK_TABLE_OR_COL cboolean2)) (TOK_SELEXPR (TOK_TABLE_OR_COL ctinyint)) (TOK_SELEXPR (TOK_TABLE_OR_COL cfloat)) (TOK_SELEXPR (TOK_TABLE_OR_COL ctimestamp1)) (TOK_SELEXPR (TOK_TABLE_OR_COL csmallint)) (TOK_SELEXPR (TOK_TABLE_OR_COL cbigint)) (TOK_SELEXPR (* (- 3728) (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (- (- 863.257) (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL csmallint) (- (TOK_TABLE_OR_COL csmallint)))) (TOK_SELEXPR (+ (- (TOK_TABLE_OR_COL csmallint) (- (TOK_TABLE_OR_COL csmallint))) (- (TOK_TABLE_OR_COL csmallint)))) (TOK_SELEXPR (/ (TOK_TABLE_OR_COL cint) (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (- (- (- 863.257) (TOK_TABLE_OR_COL cint)) (- 26.28))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (* (TOK_TABLE_OR_COL cdouble) (- 89010))) (TOK_SELEXPR (/ (TOK_TABLE_OR_COL ctinyint) 988888)) (TOK_SELEXPR (- (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (/ 79.553 (TOK_TABLE_OR_COL ctinyint)))) (TOK_WHERE (OR (OR (OR (AND (RLIKE (TOK_TABLE_OR_COL cstring1) 'a.*') (LIKE (TOK_TABLE_OR_COL cstring2) '%ss%')) (AND (!= 1 (TOK_TABLE_OR_COL cboolean2)) (AND (< (TOK_TABLE_OR_COL csmallint) 79.553) (!= (- 257) (TOK_TABLE_OR_COL ctinyint))))) (AND (> (TOK_TABLE_OR_COL cdouble) (TOK_TABLE_OR_COL ctinyint)) (>= (TOK_TABLE_OR_COL cfloat) (TOK_TABLE_OR_COL cint)))) (AND (< (TOK_TABLE_OR_COL cint) (TOK_TABLE_OR_COL cbigint)) (> (TOK_TABLE_OR_COL ctinyint) (TOK_TABLE_OR_COL cbigint))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -1242,66 +889,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 1193 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((cstring1 rlike 'a.*') and (cstring2 like '%ss%')) or ((1 <> cboolean2) and ((csmallint < 79.553) and ((- 257) <> ctinyint)))) or ((cdouble > ctinyint) and (cfloat >= cint))) or ((cint < cbigint) and (ctinyint > cbigint))) - type: boolean + predicate: (((((cstring1 rlike 'a.*') and (cstring2 like '%ss%')) or ((1 <> cboolean2) and ((csmallint < 79.553) and ((- 257) <> ctinyint)))) or ((cdouble > ctinyint) and (cfloat >= cint))) or ((cint < cbigint) and (ctinyint > cbigint))) (type: boolean) + Statistics: Num rows: 959 Data size: 303244 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: cint - type: int - expr: cdouble - type: double - expr: ctimestamp2 - type: timestamp - expr: cstring1 - type: string - expr: cboolean2 - type: boolean - expr: ctinyint - type: tinyint - expr: cfloat - type: float - expr: ctimestamp1 - type: timestamp - expr: csmallint - type: smallint - expr: cbigint - type: bigint - expr: ((- 3728) * cbigint) - type: bigint - expr: (- cint) - type: int - expr: ((- 863.257) - cint) - type: double - expr: (- csmallint) - type: smallint - expr: (csmallint - (- csmallint)) - type: smallint - expr: ((csmallint - (- csmallint)) + (- csmallint)) - type: smallint - expr: (cint / cint) - type: decimal(21,11) - expr: (((- 863.257) - cint) - (- 26.28)) - type: double - expr: (- cfloat) - type: float - expr: (cdouble * (- 89010)) - type: double - expr: (ctinyint / 988888) - type: decimal(10,7) - expr: (- ctinyint) - type: tinyint - expr: (79.553 / ctinyint) - type: double + expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), ((- 3728) * cbigint) (type: bigint), (- cint) (type: int), ((- 863.257) - cint) (type: double), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (cint / cint) (type: decimal(21,11)), (((- 863.257) - cint) - (- 26.28)) (type: double), (- cfloat) (type: float), (cdouble * (- 89010)) (type: double), (ctinyint / 988888) (type: decimal(10,7)), (- ctinyint) (type: tinyint), (79.553 / ctinyint) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Statistics: Num rows: 959 Data size: 303244 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 959 Data size: 303244 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2690,9 +2291,6 @@ WHERE (((197 > ctinyint) AND ((0 >= ctimestamp2) AND (2563.58 < csmallint)))) POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL cint)) (TOK_SELEXPR (TOK_TABLE_OR_COL cbigint)) (TOK_SELEXPR (TOK_TABLE_OR_COL cstring1)) (TOK_SELEXPR (TOK_TABLE_OR_COL cboolean1)) (TOK_SELEXPR (TOK_TABLE_OR_COL cfloat)) (TOK_SELEXPR (TOK_TABLE_OR_COL cdouble)) (TOK_SELEXPR (TOK_TABLE_OR_COL ctimestamp2)) (TOK_SELEXPR (TOK_TABLE_OR_COL csmallint)) (TOK_SELEXPR (TOK_TABLE_OR_COL cstring2)) (TOK_SELEXPR (TOK_TABLE_OR_COL cboolean2)) (TOK_SELEXPR (/ (TOK_TABLE_OR_COL cint) (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (% (TOK_TABLE_OR_COL cbigint) 79.553)) (TOK_SELEXPR (- (/ (TOK_TABLE_OR_COL cint) (TOK_TABLE_OR_COL cbigint)))) (TOK_SELEXPR (% 10.175 (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL cfloat) (- (TOK_TABLE_OR_COL cfloat)))) (TOK_SELEXPR (% (- (TOK_TABLE_OR_COL cfloat) (- (TOK_TABLE_OR_COL cfloat))) (- 6432))) (TOK_SELEXPR (* (TOK_TABLE_OR_COL cdouble) (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL cfloat) (/ (TOK_TABLE_OR_COL cint) (TOK_TABLE_OR_COL cbigint)))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (% 3569 (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (- 359 (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL csmallint)))) (TOK_WHERE (OR (OR (OR (OR (AND (> 197 (TOK_TABLE_OR_COL ctinyint)) (= (TOK_TABLE_OR_COL cint) (TOK_TABLE_OR_COL cbigint))) (= (TOK_TABLE_OR_COL cbigint) 359)) (< (TOK_TABLE_OR_COL cboolean1) 0)) (AND (LIKE (TOK_TABLE_OR_COL cstring1) '%ss') (<= (TOK_TABLE_OR_COL cfloat) (TOK_TABLE_OR_COL ctinyint)))) (AND (!= 16558 (TOK_TABLE_OR_COL ctimestamp1)) (AND (>= 0 (TOK_TABLE_OR_COL ctimestamp2)) (< 2563.58 (TOK_TABLE_OR_COL csmallint)))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2700,70 +2298,20 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 1178 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((((197.0 > ctinyint) and (cint = cbigint)) or (cbigint = 359)) or (cboolean1 < 0)) or ((cstring1 like '%ss') and (cfloat <= ctinyint))) or ((16558 <> ctimestamp1) and ((0 >= ctimestamp2) and (2563.58 < csmallint)))) - type: boolean + predicate: ((((((197.0 > ctinyint) and (cint = cbigint)) or (cbigint = 359)) or (cboolean1 < 0)) or ((cstring1 like '%ss') and (cfloat <= ctinyint))) or ((16558 <> ctimestamp1) and ((0 >= ctimestamp2) and (2563.58 < csmallint)))) (type: boolean) + Statistics: Num rows: 1178 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: cint - type: int - expr: cbigint - type: bigint - expr: cstring1 - type: string - expr: cboolean1 - type: boolean - expr: cfloat - type: float - expr: cdouble - type: double - expr: ctimestamp2 - type: timestamp - expr: csmallint - type: smallint - expr: cstring2 - type: string - expr: cboolean2 - type: boolean - expr: (cint / cbigint) - type: decimal(30,20) - expr: (cbigint % 79.553) - type: double - expr: (- (cint / cbigint)) - type: decimal(30,20) - expr: (10.175 % cfloat) - type: double - expr: (- cfloat) - type: float - expr: (cfloat - (- cfloat)) - type: double - expr: ((cfloat - (- cfloat)) % (- 6432)) - type: double - expr: (cdouble * csmallint) - type: double - expr: (- cdouble) - type: double - expr: (- cbigint) - type: bigint - expr: (cfloat - (cint / cbigint)) - type: double - expr: (- csmallint) - type: smallint - expr: (3569 % cbigint) - type: bigint - expr: (359 - cdouble) - type: double - expr: (- csmallint) - type: smallint + expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean), (cint / cbigint) (type: decimal(30,20)), (cbigint % 79.553) (type: double), (- (cint / cbigint)) (type: decimal(30,20)), (10.175 % cfloat) (type: double), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: double), ((cfloat - (- cfloat)) % (- 6432)) (type: double), (cdouble * csmallint) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), (cfloat - (cint / cbigint)) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359 - cdouble) (type: double), (- csmallint) (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 1178 Data size: 377237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1178 Data size: 377237 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2928,9 +2476,6 @@ WHERE (((csmallint > -26.28) AND (-26.28 <= csmallint))) ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL cint)) (TOK_SELEXPR (TOK_TABLE_OR_COL cstring1)) (TOK_SELEXPR (TOK_TABLE_OR_COL cboolean2)) (TOK_SELEXPR (TOK_TABLE_OR_COL ctimestamp2)) (TOK_SELEXPR (TOK_TABLE_OR_COL cdouble)) (TOK_SELEXPR (TOK_TABLE_OR_COL cfloat)) (TOK_SELEXPR (TOK_TABLE_OR_COL cbigint)) (TOK_SELEXPR (TOK_TABLE_OR_COL csmallint)) (TOK_SELEXPR (TOK_TABLE_OR_COL cboolean1)) (TOK_SELEXPR (+ (TOK_TABLE_OR_COL cint) (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL cbigint) (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (+ (- (TOK_TABLE_OR_COL cbigint) (TOK_TABLE_OR_COL ctinyint)) (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (/ (TOK_TABLE_OR_COL cdouble) (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (* (+ (TOK_TABLE_OR_COL cint) (TOK_TABLE_OR_COL csmallint)) (- (TOK_TABLE_OR_COL cbigint)))) (TOK_SELEXPR (+ (- (TOK_TABLE_OR_COL cdouble)) (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (/ (- 1.389) (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (% (TOK_TABLE_OR_COL cbigint) (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (+ (TOK_TABLE_OR_COL csmallint) (+ (TOK_TABLE_OR_COL cint) (TOK_TABLE_OR_COL csmallint))))) (TOK_WHERE (OR (OR (OR (AND (> (TOK_TABLE_OR_COL csmallint) (- 26.28)) (LIKE (TOK_TABLE_OR_COL cstring2) 'ss')) (AND (<= (TOK_TABLE_OR_COL cdouble) (TOK_TABLE_OR_COL cbigint)) (AND (>= (TOK_TABLE_OR_COL cstring1) 'ss') (!= (TOK_TABLE_OR_COL cint) (TOK_TABLE_OR_COL cdouble))))) (= (TOK_TABLE_OR_COL ctinyint) (- 89010))) (AND (<= (TOK_TABLE_OR_COL cbigint) (TOK_TABLE_OR_COL cfloat)) (<= (- 26.28) (TOK_TABLE_OR_COL csmallint))))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cboolean1)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cstring1)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ctimestamp2)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cfloat)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cbigint)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cstring1)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cdouble)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cint)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL csmallint)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cdouble))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -2938,138 +2483,29 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 1347 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((csmallint > (- 26.28)) and (cstring2 like 'ss')) or ((cdouble <= cbigint) and ((cstring1 >= 'ss') and (cint <> cdouble)))) or (ctinyint = (- 89010))) or ((cbigint <= cfloat) and ((- 26.28) <= csmallint))) - type: boolean - Vectorized execution: true + predicate: (((((csmallint > (- 26.28)) and (cstring2 like 'ss')) or ((cdouble <= cbigint) and ((cstring1 >= 'ss') and (cint <> cdouble)))) or (ctinyint = (- 89010))) or ((cbigint <= cfloat) and ((- 26.28) <= csmallint))) (type: boolean) + Statistics: Num rows: 1195 Data size: 334668 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: cint - type: int - expr: cstring1 - type: string - expr: cboolean2 - type: boolean - expr: ctimestamp2 - type: timestamp - expr: cdouble - type: double - expr: cfloat - type: float - expr: cbigint - type: bigint - expr: csmallint - type: smallint - expr: cboolean1 - type: boolean - expr: (cint + csmallint) - type: int - expr: (cbigint - ctinyint) - type: bigint - expr: (- cbigint) - type: bigint - expr: (- cfloat) - type: float - expr: ((cbigint - ctinyint) + cbigint) - type: bigint - expr: (cdouble / cdouble) - type: double - expr: (- cdouble) - type: double - expr: ((cint + csmallint) * (- cbigint)) - type: bigint - expr: ((- cdouble) + cbigint) - type: double - expr: ((- 1.389) / ctinyint) - type: double - expr: (cbigint % cdouble) - type: double - expr: (- csmallint) - type: smallint - expr: (csmallint + (cint + csmallint)) - type: int + expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + csmallint) (type: int), (cbigint - ctinyint) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - ctinyint) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), ((cint + csmallint) * (- cbigint)) (type: bigint), ((- cdouble) + cbigint) (type: double), ((- 1.389) / ctinyint) (type: double), (cbigint % cdouble) (type: double), (- csmallint) (type: smallint), (csmallint + (cint + csmallint)) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Vectorized execution: true + Statistics: Num rows: 1195 Data size: 334668 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col8 - type: boolean - expr: _col1 - type: string - expr: _col3 - type: timestamp - expr: _col5 - type: float - expr: _col6 - type: bigint - expr: _col1 - type: string - expr: _col4 - type: double - expr: _col0 - type: int - expr: _col7 - type: smallint - expr: _col4 - type: double + key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col1 (type: string), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col4 (type: double) sort order: ++++++++++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: boolean - expr: _col3 - type: timestamp - expr: _col4 - type: double - expr: _col5 - type: float - expr: _col6 - type: bigint - expr: _col7 - type: smallint - expr: _col8 - type: boolean - expr: _col9 - type: int - expr: _col10 - type: bigint - expr: _col11 - type: bigint - expr: _col12 - type: float - expr: _col13 - type: bigint - expr: _col14 - type: double - expr: _col15 - type: double - expr: _col16 - type: bigint - expr: _col17 - type: double - expr: _col18 - type: double - expr: _col19 - type: double - expr: _col20 - type: smallint - expr: _col21 - type: int - Vectorized execution: true + Statistics: Num rows: 1195 Data size: 334668 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: boolean), _col3 (type: timestamp), _col4 (type: double), _col5 (type: float), _col6 (type: bigint), _col7 (type: smallint), _col8 (type: boolean), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) + Execution mode: vectorized Reduce Operator Tree: Extract + Statistics: Num rows: 1195 Data size: 334668 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1195 Data size: 334668 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3798,9 +3234,6 @@ WHERE (((-1.389 >= cint) AND (10.175 > cbigint))) ORDER BY csmallint, cstring2, cdouble POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ctimestamp1)) (TOK_SELEXPR (TOK_TABLE_OR_COL cstring2)) (TOK_SELEXPR (TOK_TABLE_OR_COL cdouble)) (TOK_SELEXPR (TOK_TABLE_OR_COL cfloat)) (TOK_SELEXPR (TOK_TABLE_OR_COL cbigint)) (TOK_SELEXPR (TOK_TABLE_OR_COL csmallint)) (TOK_SELEXPR (/ (TOK_TABLE_OR_COL cbigint) 3569)) (TOK_SELEXPR (- (- 257) (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (* (- 6432) (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (* (TOK_TABLE_OR_COL cdouble) 10.175)) (TOK_SELEXPR (/ (* (- 6432) (TOK_TABLE_OR_COL cfloat)) (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (% (TOK_TABLE_OR_COL cint) (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (* (TOK_TABLE_OR_COL cdouble) (- (TOK_TABLE_OR_COL cdouble))))) (TOK_WHERE (OR (OR (AND (>= (- 1.389) (TOK_TABLE_OR_COL cint)) (AND (< (TOK_TABLE_OR_COL csmallint) (TOK_TABLE_OR_COL ctinyint)) (> (- 6432) (TOK_TABLE_OR_COL csmallint)))) (AND (>= (TOK_TABLE_OR_COL cdouble) (TOK_TABLE_OR_COL cfloat)) (<= (TOK_TABLE_OR_COL cstring2) 'a'))) (AND (LIKE (TOK_TABLE_OR_COL cstring1) 'ss%') (> 10.175 (TOK_TABLE_OR_COL cbigint))))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL csmallint)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cstring2)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cdouble))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -3808,97 +3241,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 1386 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((((- 1.389) >= cint) and ((csmallint < ctinyint) and ((- 6432) > csmallint))) or ((cdouble >= cfloat) and (cstring2 <= 'a'))) or ((cstring1 like 'ss%') and (10.175 > cbigint))) - type: boolean + predicate: (((((- 1.389) >= cint) and ((csmallint < ctinyint) and ((- 6432) > csmallint))) or ((cdouble >= cfloat) and (cstring2 <= 'a'))) or ((cstring1 like 'ss%') and (10.175 > cbigint))) (type: boolean) + Statistics: Num rows: 436 Data size: 118669 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: ctimestamp1 - type: timestamp - expr: cstring2 - type: string - expr: cdouble - type: double - expr: cfloat - type: float - expr: cbigint - type: bigint - expr: csmallint - type: smallint - expr: (cbigint / 3569) - type: decimal(25,6) - expr: ((- 257) - csmallint) - type: int - expr: ((- 6432) * cfloat) - type: double - expr: (- cdouble) - type: double - expr: (cdouble * 10.175) - type: double - expr: (((- 6432) * cfloat) / cfloat) - type: double - expr: (- cfloat) - type: float - expr: (cint % csmallint) - type: int - expr: (- cdouble) - type: double - expr: (cdouble * (- cdouble)) - type: double + expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (cbigint / 3569) (type: decimal(25,6)), ((- 257) - csmallint) (type: int), ((- 6432) * cfloat) (type: double), (- cdouble) (type: double), (cdouble * 10.175) (type: double), (((- 6432) * cfloat) / cfloat) (type: double), (- cfloat) (type: float), (cint % csmallint) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 436 Data size: 118669 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col5 - type: smallint - expr: _col1 - type: string - expr: _col2 - type: double + key expressions: _col5 (type: smallint), _col1 (type: string), _col2 (type: double) sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: timestamp - expr: _col1 - type: string - expr: _col2 - type: double - expr: _col3 - type: float - expr: _col4 - type: bigint - expr: _col5 - type: smallint - expr: _col6 - type: decimal(25,6) - expr: _col7 - type: int - expr: _col8 - type: double - expr: _col9 - type: double - expr: _col10 - type: double - expr: _col11 - type: double - expr: _col12 - type: float - expr: _col13 - type: int - expr: _col14 - type: double - expr: _col15 - type: double + Statistics: Num rows: 436 Data size: 118669 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: float), _col4 (type: bigint), _col5 (type: smallint), _col6 (type: decimal(25,6)), _col7 (type: int), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: int), _col14 (type: double), _col15 (type: double) Reduce Operator Tree: Extract + Statistics: Num rows: 436 Data size: 118669 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 436 Data size: 118669 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4682,9 +4046,6 @@ WHERE (((csmallint >= -257)) GROUP BY csmallint ORDER BY csmallint POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL csmallint)) (TOK_SELEXPR (% (TOK_TABLE_OR_COL csmallint) (- 75))) (TOK_SELEXPR (TOK_FUNCTION STDDEV_SAMP (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (/ (- 1.389) (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (/ (% (TOK_TABLE_OR_COL csmallint) (- 75)) (TOK_FUNCTION SUM (TOK_TABLE_OR_COL cbigint)))) (TOK_SELEXPR (- (% (TOK_TABLE_OR_COL csmallint) (- 75)))) (TOK_SELEXPR (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (- (- (% (TOK_TABLE_OR_COL csmallint) (- 75))))) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT)) (TOK_SELEXPR (- (TOK_FUNCTIONSTAR COUNT) (- 89010)))) (TOK_WHERE (AND (>= (TOK_TABLE_OR_COL csmallint) (- 257)) (OR (= (- 6432) (TOK_TABLE_OR_COL csmallint)) (AND (>= (TOK_TABLE_OR_COL cint) (TOK_TABLE_OR_COL cdouble)) (<= (TOK_TABLE_OR_COL ctinyint) (TOK_TABLE_OR_COL cint)))))) (TOK_GROUPBY (TOK_TABLE_OR_COL csmallint)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL csmallint))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -4693,97 +4054,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 13472 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((csmallint >= (- 257)) and (((- 6432) = csmallint) or ((cint >= cdouble) and (ctinyint <= cint)))) - type: boolean - Vectorized execution: true + predicate: ((csmallint >= (- 257)) and (((- 6432) = csmallint) or ((cint >= cdouble) and (ctinyint <= cint)))) (type: boolean) + Statistics: Num rows: 2743 Data size: 76808 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: csmallint - type: smallint - expr: cbigint - type: bigint - expr: ctinyint - type: tinyint + expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint) outputColumnNames: csmallint, cbigint, ctinyint - Vectorized execution: true + Statistics: Num rows: 2743 Data size: 76808 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: stddev_samp(csmallint) - expr: sum(cbigint) - expr: var_pop(ctinyint) - expr: count() - bucketGroup: false - keys: - expr: csmallint - type: smallint + aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count() + keys: csmallint (type: smallint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Vectorized execution: true + Statistics: Num rows: 2743 Data size: 76808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: smallint + key expressions: _col0 (type: smallint) sort order: + - Map-reduce partition columns: - expr: _col0 - type: smallint - tag: -1 - value expressions: - expr: _col1 - type: struct - expr: _col2 - type: bigint - expr: _col3 - type: struct - expr: _col4 - type: bigint + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 2743 Data size: 76808 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: struct), _col4 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: - expr: stddev_samp(VALUE._col0) - expr: sum(VALUE._col1) - expr: var_pop(VALUE._col2) - expr: count(VALUE._col3) - bucketGroup: false - keys: - expr: KEY._col0 - type: smallint + aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3) + keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1371 Data size: 38389 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: smallint - expr: (_col0 % (- 75)) - type: int - expr: _col1 - type: double - expr: ((- 1.389) / _col0) - type: double - expr: _col2 - type: bigint - expr: ((_col0 % (- 75)) / _col2) - type: decimal(30,20) - expr: (- (_col0 % (- 75))) - type: int - expr: _col3 - type: double - expr: (- (- (_col0 % (- 75)))) - type: int - expr: _col4 - type: bigint - expr: (_col4 - (- 89010)) - type: bigint + expressions: _col0 (type: smallint), (_col0 % (- 75)) (type: int), _col1 (type: double), ((- 1.389) / _col0) (type: double), _col2 (type: bigint), ((_col0 % (- 75)) / _col2) (type: decimal(30,20)), (- (_col0 % (- 75))) (type: int), _col3 (type: double), (- (- (_col0 % (- 75)))) (type: int), _col4 (type: bigint), (_col4 - (- 89010)) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1371 Data size: 38389 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4791,43 +4098,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: smallint + key expressions: _col0 (type: smallint) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: smallint - expr: _col1 - type: int - expr: _col2 - type: double - expr: _col3 - type: double - expr: _col4 - type: bigint - expr: _col5 - type: decimal(30,20) - expr: _col6 - type: int - expr: _col7 - type: double - expr: _col8 - type: int - expr: _col9 - type: bigint - expr: _col10 - type: bigint + Statistics: Num rows: 1371 Data size: 38389 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(30,20)), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint) Reduce Operator Tree: Extract + Statistics: Num rows: 1371 Data size: 38389 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1371 Data size: 38389 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -6396,9 +5679,6 @@ WHERE (((cdouble > 2563.58)) GROUP BY cdouble ORDER BY cdouble POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL cdouble)) (TOK_SELEXPR (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (* 2563.58 (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cdouble)))) (TOK_SELEXPR (- (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cdouble)))) (TOK_SELEXPR (TOK_FUNCTION COUNT (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (+ (* 2563.58 (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cdouble))) (- 5638.15))) (TOK_SELEXPR (* (- (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cdouble))) (+ (* 2563.58 (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cdouble))) (- 5638.15)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (- (TOK_TABLE_OR_COL cdouble) (- (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cdouble))))) (TOK_SELEXPR (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (+ (TOK_TABLE_OR_COL cdouble) (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cdouble)))) (TOK_SELEXPR (* (TOK_TABLE_OR_COL cdouble) 762)) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (% (- 863.257) (* (TOK_TABLE_OR_COL cdouble) 762))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_TABLE_OR_COL cdouble)))) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL cdouble) 2563.58) (OR (OR (AND (>= (TOK_TABLE_OR_COL cbigint) (TOK_TABLE_OR_COL cint)) (AND (< (TOK_TABLE_OR_COL csmallint) (TOK_TABLE_OR_COL cint)) (< (TOK_TABLE_OR_COL cfloat) (- 5638.15)))) (= 2563.58 (TOK_TABLE_OR_COL ctinyint))) (AND (<= (TOK_TABLE_OR_COL cdouble) (TOK_TABLE_OR_COL cbigint)) (> (- 5638.15) (TOK_TABLE_OR_COL cbigint)))))) (TOK_GROUPBY (TOK_TABLE_OR_COL cdouble)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cdouble))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -6407,113 +5687,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 13472 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((cdouble > 2563.58) and ((((cbigint >= cint) and ((csmallint < cint) and (cfloat < (- 5638.15)))) or false) or ((cdouble <= cbigint) and ((- 5638.15) > cbigint)))) - type: boolean - Vectorized execution: true + predicate: ((cdouble > 2563.58) and ((((cbigint >= cint) and ((csmallint < cint) and (cfloat < (- 5638.15)))) or false) or ((cdouble <= cbigint) and ((- 5638.15) > cbigint)))) (type: boolean) + Statistics: Num rows: 2909 Data size: 81456 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: cdouble - type: double - expr: cfloat - type: float + expressions: cdouble (type: double), cfloat (type: float) outputColumnNames: cdouble, cfloat - Vectorized execution: true + Statistics: Num rows: 2909 Data size: 81456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: var_samp(cdouble) - expr: count(cfloat) - expr: sum(cfloat) - expr: var_pop(cdouble) - expr: stddev_pop(cdouble) - expr: sum(cdouble) - bucketGroup: false - keys: - expr: cdouble - type: double + aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble) + keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Vectorized execution: true + Statistics: Num rows: 2909 Data size: 81456 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - Map-reduce partition columns: - expr: _col0 - type: double - tag: -1 - value expressions: - expr: _col1 - type: struct - expr: _col2 - type: bigint - expr: _col3 - type: double - expr: _col4 - type: struct - expr: _col5 - type: struct - expr: _col6 - type: double + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 2909 Data size: 81456 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct), _col5 (type: struct), _col6 (type: double) + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: - expr: var_samp(VALUE._col0) - expr: count(VALUE._col1) - expr: sum(VALUE._col2) - expr: var_pop(VALUE._col3) - expr: stddev_pop(VALUE._col4) - expr: sum(VALUE._col5) - bucketGroup: false - keys: - expr: KEY._col0 - type: double + aggregations: var_samp(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), sum(VALUE._col5) + keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1454 Data size: 40713 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: double - expr: (2563.58 * _col1) - type: double - expr: (- _col1) - type: double - expr: _col2 - type: bigint - expr: ((2563.58 * _col1) + (- 5638.15)) - type: double - expr: ((- _col1) * ((2563.58 * _col1) + (- 5638.15))) - type: double - expr: _col3 - type: double - expr: _col4 - type: double - expr: (_col0 - (- _col1)) - type: double - expr: _col5 - type: double - expr: (_col0 + _col1) - type: double - expr: (_col0 * 762) - type: double - expr: _col6 - type: double - expr: ((- 863.257) % (_col0 * 762)) - type: double - expr: _col6 - type: double + expressions: _col0 (type: double), _col1 (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + (- 5638.15)) (type: double), ((- _col1) * ((2563.58 * _col1) + (- 5638.15))) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762) (type: double), _col6 (type: double), ((- 863.257) % (_col0 * 762)) (type: double), _col6 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1454 Data size: 40713 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6521,53 +5731,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: double + key expressions: _col0 (type: double) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: double - expr: _col1 - type: double - expr: _col2 - type: double - expr: _col3 - type: double - expr: _col4 - type: bigint - expr: _col5 - type: double - expr: _col6 - type: double - expr: _col7 - type: double - expr: _col8 - type: double - expr: _col9 - type: double - expr: _col10 - type: double - expr: _col11 - type: double - expr: _col12 - type: double - expr: _col13 - type: double - expr: _col14 - type: double - expr: _col15 - type: double + Statistics: Num rows: 1454 Data size: 40713 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double) Reduce Operator Tree: Extract + Statistics: Num rows: 1454 Data size: 40713 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1454 Data size: 40713 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -6753,9 +5929,6 @@ WHERE (((ctimestamp1 != 0)) AND (cfloat < cint)))) GROUP BY ctimestamp1, cstring1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ctimestamp1)) (TOK_SELEXPR (TOK_TABLE_OR_COL cstring1)) (TOK_SELEXPR (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (* (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)) 10.175)) (TOK_SELEXPR (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)))) (TOK_SELEXPR (TOK_FUNCTION AVG (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)))) (TOK_SELEXPR (- (- 26.28) (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)))) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT)) (TOK_SELEXPR (- (TOK_FUNCTIONSTAR COUNT))) (TOK_SELEXPR (* (- (- 26.28) (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))) (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))))) (TOK_SELEXPR (TOK_FUNCTION MIN (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (* (* (- (- 26.28) (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))) (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)))) (- (TOK_FUNCTIONSTAR COUNT)))) (TOK_SELEXPR (- (* (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)) 10.175))) (TOK_SELEXPR (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (+ (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL csmallint)) (* (* (- (- 26.28) (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))) (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)))) (- (TOK_FUNCTIONSTAR COUNT))))) (TOK_SELEXPR (- (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))))) (TOK_SELEXPR (/ (- (TOK_FUNCTIONSTAR COUNT)) (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)))) (TOK_SELEXPR (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (/ 10.175 (TOK_FUNCTION AVG (TOK_TABLE_OR_COL csmallint)))) (TOK_SELEXPR (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (- (+ (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL csmallint)) (* (* (- (- 26.28) (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))) (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)))) (- (TOK_FUNCTIONSTAR COUNT)))) (* (* (- (- 26.28) (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))) (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)))) (- (TOK_FUNCTIONSTAR COUNT))))) (TOK_SELEXPR (- (- (* (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)) 10.175)))) (TOK_SELEXPR (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (* (- (+ (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL csmallint)) (* (* (- (- 26.28) (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))) (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)))) (- (TOK_FUNCTIONSTAR COUNT)))) (* (* (- (- 26.28) (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))) (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)))) (- (TOK_FUNCTIONSTAR COUNT)))) 10.175)) (TOK_SELEXPR (% 10.175 (/ 10.175 (TOK_FUNCTION AVG (TOK_TABLE_OR_COL csmallint))))) (TOK_SELEXPR (- (TOK_FUNCTION MIN (TOK_TABLE_OR_COL ctinyint)))) (TOK_SELEXPR (TOK_FUNCTION MIN (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (- (* (- (- 26.28) (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))) (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)))))) (TOK_SELEXPR (% (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))) (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cfloat)))) (TOK_SELEXPR (/ (- 26.28) (- (TOK_FUNCTION MIN (TOK_TABLE_OR_COL ctinyint))))) (TOK_SELEXPR (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (/ (+ (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL csmallint)) (* (* (- (- 26.28) (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))) (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)))) (- (TOK_FUNCTIONSTAR COUNT)))) (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cfloat)))) (TOK_SELEXPR (- (- (TOK_FUNCTIONSTAR COUNT)))) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT)) (TOK_SELEXPR (% (+ (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL csmallint)) (* (* (- (- 26.28) (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint))) (- (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL cint)))) (- (TOK_FUNCTIONSTAR COUNT)))) (- 26.28)))) (TOK_WHERE (AND (!= (TOK_TABLE_OR_COL ctimestamp1) 0) (OR (OR (OR (OR (AND (AND (!= (- 257) (TOK_TABLE_OR_COL ctinyint)) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL cboolean2))) (AND (RLIKE (TOK_TABLE_OR_COL cstring1) '.*ss') (< (- 10669) (TOK_TABLE_OR_COL ctimestamp1)))) (= (TOK_TABLE_OR_COL ctimestamp2) (- 10669))) (AND (< (TOK_TABLE_OR_COL ctimestamp1) 0) (LIKE (TOK_TABLE_OR_COL cstring2) '%b%'))) (= (TOK_TABLE_OR_COL cdouble) (TOK_TABLE_OR_COL cint))) (AND (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL cboolean1)) (< (TOK_TABLE_OR_COL cfloat) (TOK_TABLE_OR_COL cint)))))) (TOK_GROUPBY (TOK_TABLE_OR_COL ctimestamp1) (TOK_TABLE_OR_COL cstring1)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -6763,205 +5936,44 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 1209 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((ctimestamp1 <> 0) and ((((((((- 257) <> ctinyint) and cboolean2 is not null) and ((cstring1 rlike '.*ss') and ((- 10669) < ctimestamp1))) or (ctimestamp2 = (- 10669))) or ((ctimestamp1 < 0) and (cstring2 like '%b%'))) or (cdouble = cint)) or (cboolean1 is null and (cfloat < cint)))) - type: boolean - Vectorized execution: true + predicate: ((ctimestamp1 <> 0) and ((((((((- 257) <> ctinyint) and cboolean2 is not null) and ((cstring1 rlike '.*ss') and ((- 10669) < ctimestamp1))) or (ctimestamp2 = (- 10669))) or ((ctimestamp1 < 0) and (cstring2 like '%b%'))) or (cdouble = cint)) or (cboolean1 is null and (cfloat < cint)))) (type: boolean) + Statistics: Num rows: 1209 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: ctimestamp1 - type: timestamp - expr: cstring1 - type: string - expr: cint - type: int - expr: csmallint - type: smallint - expr: ctinyint - type: tinyint - expr: cfloat - type: float - expr: cdouble - type: double + expressions: ctimestamp1 (type: timestamp), cstring1 (type: string), cint (type: int), csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cdouble (type: double) outputColumnNames: ctimestamp1, cstring1, cint, csmallint, ctinyint, cfloat, cdouble - Vectorized execution: true + Statistics: Num rows: 1209 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: stddev_pop(cint) - expr: avg(csmallint) - expr: count() - expr: min(ctinyint) - expr: var_samp(csmallint) - expr: var_pop(cfloat) - expr: avg(cint) - expr: var_samp(cfloat) - expr: avg(cfloat) - expr: min(cdouble) - expr: var_pop(csmallint) - expr: stddev_pop(ctinyint) - expr: sum(cint) - bucketGroup: false - keys: - expr: ctimestamp1 - type: timestamp - expr: cstring1 - type: string + aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint) + keys: ctimestamp1 (type: timestamp), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Vectorized execution: true + Statistics: Num rows: 1209 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: timestamp - expr: _col1 - type: string + key expressions: _col0 (type: timestamp), _col1 (type: string) sort order: ++ - Map-reduce partition columns: - expr: _col0 - type: timestamp - expr: _col1 - type: string - tag: -1 - value expressions: - expr: _col2 - type: struct - expr: _col3 - type: struct - expr: _col4 - type: bigint - expr: _col5 - type: tinyint - expr: _col6 - type: struct - expr: _col7 - type: struct - expr: _col8 - type: struct - expr: _col9 - type: struct - expr: _col10 - type: struct - expr: _col11 - type: double - expr: _col12 - type: struct - expr: _col13 - type: struct - expr: _col14 - type: bigint + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: string) + Statistics: Num rows: 1209 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: bigint), _col5 (type: tinyint), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: double), _col12 (type: struct), _col13 (type: struct), _col14 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: - expr: stddev_pop(VALUE._col0) - expr: avg(VALUE._col1) - expr: count(VALUE._col2) - expr: min(VALUE._col3) - expr: var_samp(VALUE._col4) - expr: var_pop(VALUE._col5) - expr: avg(VALUE._col6) - expr: var_samp(VALUE._col7) - expr: avg(VALUE._col8) - expr: min(VALUE._col9) - expr: var_pop(VALUE._col10) - expr: stddev_pop(VALUE._col11) - expr: sum(VALUE._col12) - bucketGroup: false - keys: - expr: KEY._col0 - type: timestamp - expr: KEY._col1 - type: string + aggregations: stddev_pop(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), min(VALUE._col3), var_samp(VALUE._col4), var_pop(VALUE._col5), avg(VALUE._col6), var_samp(VALUE._col7), avg(VALUE._col8), min(VALUE._col9), var_pop(VALUE._col10), stddev_pop(VALUE._col11), sum(VALUE._col12) + keys: KEY._col0 (type: timestamp), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 604 Data size: 188462 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: timestamp - expr: _col1 - type: string - expr: _col2 - type: double - expr: (_col2 * 10.175) - type: double - expr: (- _col2) - type: double - expr: _col3 - type: double - expr: (- _col2) - type: double - expr: ((- 26.28) - _col2) - type: double - expr: _col4 - type: bigint - expr: (- _col4) - type: bigint - expr: (((- 26.28) - _col2) * (- _col2)) - type: double - expr: _col5 - type: tinyint - expr: ((((- 26.28) - _col2) * (- _col2)) * (- _col4)) - type: double - expr: (- (_col2 * 10.175)) - type: double - expr: _col6 - type: double - expr: (_col6 + ((((- 26.28) - _col2) * (- _col2)) * (- _col4))) - type: double - expr: (- (- _col2)) - type: double - expr: ((- _col4) / _col2) - type: double - expr: _col7 - type: double - expr: (10.175 / _col3) - type: double - expr: _col8 - type: double - expr: _col9 - type: double - expr: ((_col6 + ((((- 26.28) - _col2) * (- _col2)) * (- _col4))) - ((((- 26.28) - _col2) * (- _col2)) * (- _col4))) - type: double - expr: (- (- (_col2 * 10.175))) - type: double - expr: _col10 - type: double - expr: (((_col6 + ((((- 26.28) - _col2) * (- _col2)) * (- _col4))) - ((((- 26.28) - _col2) * (- _col2)) * (- _col4))) * 10.175) - type: double - expr: (10.175 % (10.175 / _col3)) - type: double - expr: (- _col5) - type: tinyint - expr: _col11 - type: double - expr: _col12 - type: double - expr: (- (((- 26.28) - _col2) * (- _col2))) - type: double - expr: ((- _col2) % _col10) - type: double - expr: ((- 26.28) / (- _col5)) - type: double - expr: _col13 - type: double - expr: _col14 - type: bigint - expr: ((_col6 + ((((- 26.28) - _col2) * (- _col2)) * (- _col4))) / _col7) - type: double - expr: (- (- _col4)) - type: bigint - expr: _col4 - type: bigint - expr: ((_col6 + ((((- 26.28) - _col2) * (- _col2)) * (- _col4))) % (- 26.28)) - type: double + expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), (_col2 * 10.175) (type: double), (- _col2) (type: double), _col3 (type: double), (- _col2) (type: double), ((- 26.28) - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint), (((- 26.28) - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), ((((- 26.28) - _col2) * (- _col2)) * (- _col4)) (type: double), (- (_col2 * 10.175)) (type: double), _col6 (type: double), (_col6 + ((((- 26.28) - _col2) * (- _col2)) * (- _col4))) (type: double), (- (- _col2)) (type: double), ((- _col4) / _col2) (type: double), _col7 (type: double), (10.175 / _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + ((((- 26.28) - _col2) * (- _col2)) * (- _col4))) - ((((- 26.28) - _col2) * (- _col2)) * (- _col4))) (type: double), (- (- (_col2 * 10.175))) (type: double), _col10 (type: double), (((_col6 + ((((- 26.28) - _col2) * (- _col2)) * (- _col4))) - ((((- 26.28) - _col2) * (- _col2)) * (- _col4))) * 10.175) (type: double), (10.175 % (10.175 / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (- (((- 26.28) - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), ((- 26.28) / (- _col5)) (type: double), _col13 (type: double), _col14 (type: bigint), ((_col6 + ((((- 26.28) - _col2) * (- _col2)) * (- _col4))) / _col7) (type: double), (- (- _col4)) (type: bigint), _col4 (type: bigint), ((_col6 + ((((- 26.28) - _col2) * (- _col2)) * (- _col4))) % (- 26.28)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 + Statistics: Num rows: 604 Data size: 188462 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 604 Data size: 188462 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -7410,9 +6422,6 @@ WHERE (((cboolean1 IS NOT NULL)) GROUP BY cboolean1 ORDER BY cboolean1 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL cboolean1)) (TOK_SELEXPR (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (- (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cfloat)))) (TOK_SELEXPR (/ (- 26.28) (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cfloat)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (- (TOK_FUNCTION SUM (TOK_TABLE_OR_COL cbigint)) 10.175)) (TOK_SELEXPR (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (% (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cint)) (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cfloat)))) (TOK_SELEXPR (+ 10.175 (- (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cfloat))))) (TOK_SELEXPR (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (+ (- (TOK_FUNCTION SUM (TOK_TABLE_OR_COL cbigint)) 10.175) (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cint)))) (TOK_SELEXPR (TOK_FUNCTION MIN (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (- (+ 10.175 (- (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cfloat)))))) (TOK_SELEXPR (/ 79.553 (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cbigint)))) (TOK_SELEXPR (% (TOK_FUNCTION VAR_SAMP (TOK_TABLE_OR_COL cint)) (/ 79.553 (TOK_FUNCTION VAR_POP (TOK_TABLE_OR_COL cbigint))))) (TOK_SELEXPR (- (+ 10.175 (- (TOK_FUNCTION MAX (TOK_TABLE_OR_COL cfloat)))))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (TOK_FUNCTION STDDEV_SAMP (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (* (- 1.389) (TOK_FUNCTION MIN (TOK_TABLE_OR_COL cbigint)))) (TOK_SELEXPR (- (TOK_FUNCTION SUM (TOK_TABLE_OR_COL cint)) (* (- 1.389) (TOK_FUNCTION MIN (TOK_TABLE_OR_COL cbigint))))) (TOK_SELEXPR (TOK_FUNCTION STDDEV_POP (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (- (- (TOK_FUNCTION SUM (TOK_TABLE_OR_COL cint)) (* (- 1.389) (TOK_FUNCTION MIN (TOK_TABLE_OR_COL cbigint)))))) (TOK_SELEXPR (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (- (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint)))) (TOK_SELEXPR (* (TOK_FUNCTION AVG (TOK_TABLE_OR_COL cint)) (TOK_FUNCTION SUM (TOK_TABLE_OR_COL cint))))) (TOK_WHERE (AND (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL cboolean1)) (OR (OR (OR (AND (< (TOK_TABLE_OR_COL cdouble) (TOK_TABLE_OR_COL csmallint)) (AND (= (TOK_TABLE_OR_COL cboolean2) (TOK_TABLE_OR_COL cboolean1)) (<= (TOK_TABLE_OR_COL cbigint) (- 863.257)))) (AND (>= (TOK_TABLE_OR_COL cint) (- 257)) (AND (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL cstring1)) (>= (TOK_TABLE_OR_COL cboolean1) 1)))) (RLIKE (TOK_TABLE_OR_COL cstring2) 'b')) (AND (>= (TOK_TABLE_OR_COL csmallint) (TOK_TABLE_OR_COL ctinyint)) (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL ctimestamp2)))))) (TOK_GROUPBY (TOK_TABLE_OR_COL cboolean1)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cboolean1))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -7421,159 +6430,43 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 1347 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (cboolean1 is not null and (((((cdouble < csmallint) and ((cboolean2 = cboolean1) and (cbigint <= (- 863.257)))) or ((cint >= (- 257)) and (cstring1 is not null and (cboolean1 >= 1)))) or (cstring2 rlike 'b')) or ((csmallint >= ctinyint) and ctimestamp2 is null))) - type: boolean - Vectorized execution: true + predicate: (cboolean1 is not null and (((((cdouble < csmallint) and ((cboolean2 = cboolean1) and (cbigint <= (- 863.257)))) or ((cint >= (- 257)) and (cstring1 is not null and (cboolean1 >= 1)))) or (cstring2 rlike 'b')) or ((csmallint >= ctinyint) and ctimestamp2 is null))) (type: boolean) + Statistics: Num rows: 523 Data size: 146469 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: cboolean1 - type: boolean - expr: cfloat - type: float - expr: cbigint - type: bigint - expr: cint - type: int - expr: cdouble - type: double - expr: ctinyint - type: tinyint - expr: csmallint - type: smallint + expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint) outputColumnNames: cboolean1, cfloat, cbigint, cint, cdouble, ctinyint, csmallint - Vectorized execution: true + Statistics: Num rows: 523 Data size: 146469 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: max(cfloat) - expr: sum(cbigint) - expr: var_samp(cint) - expr: avg(cdouble) - expr: min(cbigint) - expr: var_pop(cbigint) - expr: sum(cint) - expr: stddev_samp(ctinyint) - expr: stddev_pop(csmallint) - expr: avg(cint) - bucketGroup: false - keys: - expr: cboolean1 - type: boolean + aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) + keys: cboolean1 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Vectorized execution: true + Statistics: Num rows: 523 Data size: 146469 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: _col0 - type: boolean + key expressions: _col0 (type: boolean) sort order: + - Map-reduce partition columns: - expr: _col0 - type: boolean - tag: -1 - value expressions: - expr: _col1 - type: float - expr: _col2 - type: bigint - expr: _col3 - type: struct - expr: _col4 - type: struct - expr: _col5 - type: bigint - expr: _col6 - type: struct - expr: _col7 - type: bigint - expr: _col8 - type: struct - expr: _col9 - type: struct - expr: _col10 - type: struct + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 523 Data size: 146469 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: struct), _col4 (type: struct), _col5 (type: bigint), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: - expr: max(VALUE._col0) - expr: sum(VALUE._col1) - expr: var_samp(VALUE._col2) - expr: avg(VALUE._col3) - expr: min(VALUE._col4) - expr: var_pop(VALUE._col5) - expr: sum(VALUE._col6) - expr: stddev_samp(VALUE._col7) - expr: stddev_pop(VALUE._col8) - expr: avg(VALUE._col9) - bucketGroup: false - keys: - expr: KEY._col0 - type: boolean + aggregations: max(VALUE._col0), sum(VALUE._col1), var_samp(VALUE._col2), avg(VALUE._col3), min(VALUE._col4), var_pop(VALUE._col5), sum(VALUE._col6), stddev_samp(VALUE._col7), stddev_pop(VALUE._col8), avg(VALUE._col9) + keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 261 Data size: 73094 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: boolean - expr: _col1 - type: float - expr: (- _col1) - type: float - expr: ((- 26.28) / _col1) - type: double - expr: _col2 - type: bigint - expr: (_col2 - 10.175) - type: double - expr: _col3 - type: double - expr: (_col3 % _col1) - type: double - expr: (10.175 + (- _col1)) - type: double - expr: _col4 - type: double - expr: ((_col2 - 10.175) + _col3) - type: double - expr: _col5 - type: bigint - expr: _col6 - type: double - expr: (- (10.175 + (- _col1))) - type: double - expr: (79.553 / _col6) - type: double - expr: (_col3 % (79.553 / _col6)) - type: double - expr: (- (10.175 + (- _col1))) - type: double - expr: _col7 - type: bigint - expr: _col8 - type: double - expr: ((- 1.389) * _col5) - type: double - expr: (_col7 - ((- 1.389) * _col5)) - type: double - expr: _col9 - type: double - expr: (- (_col7 - ((- 1.389) * _col5))) - type: double - expr: _col10 - type: double - expr: (- _col10) - type: double - expr: (_col10 * _col7) - type: double + expressions: _col0 (type: boolean), _col1 (type: float), (- _col1) (type: float), ((- 26.28) / _col1) (type: double), _col2 (type: bigint), (_col2 - 10.175) (type: double), _col3 (type: double), (_col3 % _col1) (type: double), (10.175 + (- _col1)) (type: double), _col4 (type: double), ((_col2 - 10.175) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + (- _col1))) (type: double), (79.553 / _col6) (type: double), (_col3 % (79.553 / _col6)) (type: double), (- (10.175 + (- _col1))) (type: double), _col7 (type: bigint), _col8 (type: double), ((- 1.389) * _col5) (type: double), (_col7 - ((- 1.389) * _col5)) (type: double), _col9 (type: double), (- (_col7 - ((- 1.389) * _col5))) (type: double), _col10 (type: double), (- _col10) (type: double), (_col10 * _col7) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 261 Data size: 73094 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -7581,73 +6474,19 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator - key expressions: - expr: _col0 - type: boolean + key expressions: _col0 (type: boolean) sort order: + - tag: -1 - value expressions: - expr: _col0 - type: boolean - expr: _col1 - type: float - expr: _col2 - type: float - expr: _col3 - type: double - expr: _col4 - type: bigint - expr: _col5 - type: double - expr: _col6 - type: double - expr: _col7 - type: double - expr: _col8 - type: double - expr: _col9 - type: double - expr: _col10 - type: double - expr: _col11 - type: bigint - expr: _col12 - type: double - expr: _col13 - type: double - expr: _col14 - type: double - expr: _col15 - type: double - expr: _col16 - type: double - expr: _col17 - type: bigint - expr: _col18 - type: double - expr: _col19 - type: double - expr: _col20 - type: double - expr: _col21 - type: double - expr: _col22 - type: double - expr: _col23 - type: double - expr: _col24 - type: double - expr: _col25 - type: double + Statistics: Num rows: 261 Data size: 73094 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: boolean), _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double) Reduce Operator Tree: Extract + Statistics: Num rows: 261 Data size: 73094 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 261 Data size: 73094 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out index ba4dea5..fb8fe02 100644 --- ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out @@ -108,9 +108,6 @@ POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSche POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME vsmb_bucket_1) a) (TOK_TABREF (TOK_TABNAME vsmb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -118,43 +115,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - b + Map Operator Tree: TableScan alias: b + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 1 - Vectorized execution: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Vectorized execution: true File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Vectorized execution: true + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -196,9 +180,6 @@ POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSche POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME vsmb_bucket_1) a) (TOK_TABREF (TOK_TABNAME vsmb_bucket_RC) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -206,43 +187,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 - Vectorized execution: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Vectorized execution: true File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Vectorized execution: true + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -294,9 +262,6 @@ POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSche POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME vsmb_bucket_1) a) (TOK_TABREF (TOK_TABNAME vsmb_bucket_TXT) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -304,43 +269,30 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - a + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {key} {value} - handleSkewJoin: false keys: - 0 [Column[key]] - 1 [Column[key]] + 0 key (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 - Vectorized execution: true Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Vectorized execution: true File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Vectorized execution: true + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_case.q.out ql/src/test/results/clientpositive/vectorized_case.q.out index 9409f46..e013f49 100644 --- ql/src/test/results/clientpositive/vectorized_case.q.out +++ ql/src/test/results/clientpositive/vectorized_case.q.out @@ -34,9 +34,6 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL csmallint)) (TOK_SELEXPR (TOK_FUNCTION when (= (TOK_TABLE_OR_COL csmallint) 418) "a" (= (TOK_TABLE_OR_COL csmallint) 12205) "b" "c")) (TOK_SELEXPR (TOK_FUNCTION case (TOK_TABLE_OR_COL csmallint) 418 "a" 12205 "b" "c"))) (TOK_WHERE (or (or (= (TOK_TABLE_OR_COL csmallint) 418) (= (TOK_TABLE_OR_COL csmallint) 12205)) (= (TOK_TABLE_OR_COL csmallint) 10583))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -44,33 +41,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((csmallint = 418) or (csmallint = 12205)) or (csmallint = 10583)) - type: boolean - Vectorized execution: true + predicate: (((csmallint = 418) or (csmallint = 12205)) or (csmallint = 10583)) (type: boolean) + Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: csmallint - type: smallint - expr: CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END - type: string - expr: CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END - type: string + expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 - Vectorized execution: true + Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Vectorized execution: true + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_casts.q.out ql/src/test/results/clientpositive/vectorized_casts.q.out index 723ef14..2027f7a 100644 --- ql/src/test/results/clientpositive/vectorized_casts.q.out +++ ql/src/test/results/clientpositive/vectorized_casts.q.out @@ -146,9 +146,6 @@ from alltypesorc -- limit output to a reasonably small number of rows where cbigint % 250 = 0 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_BOOLEAN (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (TOK_FUNCTION TOK_BOOLEAN (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (TOK_FUNCTION TOK_BOOLEAN (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (TOK_FUNCTION TOK_BOOLEAN (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (TOK_FUNCTION TOK_BOOLEAN (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (TOK_FUNCTION TOK_BOOLEAN (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION TOK_BOOLEAN (TOK_TABLE_OR_COL cboolean1))) (TOK_SELEXPR (TOK_FUNCTION TOK_BOOLEAN (* (TOK_TABLE_OR_COL cbigint) 0))) (TOK_SELEXPR (TOK_FUNCTION TOK_BOOLEAN (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION TOK_BOOLEAN (TOK_TABLE_OR_COL cstring1))) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL cboolean1))) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL cstring1))) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_FUNCTION substr (TOK_TABLE_OR_COL cstring1) 1 1))) (TOK_SELEXPR (TOK_FUNCTION TOK_TINYINT (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (TOK_FUNCTION TOK_SMALLINT (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (TOK_FUNCTION TOK_BIGINT (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL cboolean1))) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL cstring1))) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_FUNCTION substr (TOK_TABLE_OR_COL cstring1) 1 1))) (TOK_SELEXPR (TOK_FUNCTION TOK_FLOAT (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (TOK_FUNCTION TOK_FLOAT (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP (TOK_TABLE_OR_COL cboolean1))) (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP (* (TOK_TABLE_OR_COL cbigint) 0))) (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP (TOK_TABLE_OR_COL cstring1))) (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP (TOK_FUNCTION substr (TOK_TABLE_OR_COL cstring1) 1 1))) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_TABLE_OR_COL cboolean1))) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (* (TOK_TABLE_OR_COL cbigint) 0))) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_TABLE_OR_COL cstring1))) (TOK_SELEXPR (TOK_FUNCTION TOK_FLOAT (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL cfloat)))) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (* (TOK_TABLE_OR_COL cint) 2))) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_FUNCTION sin (TOK_TABLE_OR_COL cfloat)))) (TOK_SELEXPR (+ (TOK_FUNCTION TOK_FLOAT (TOK_TABLE_OR_COL cint)) (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL cboolean1))))) (TOK_WHERE (= (% (TOK_TABLE_OR_COL cbigint) 250) 0)))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -156,147 +153,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 2143 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((cbigint % 250) = 0) - type: boolean - Vectorized execution: true + predicate: ((cbigint % 250) = 0) (type: boolean) + Statistics: Num rows: 1071 Data size: 188530 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: UDFToBoolean(ctinyint) - type: boolean - expr: UDFToBoolean(csmallint) - type: boolean - expr: UDFToBoolean(cint) - type: boolean - expr: UDFToBoolean(cbigint) - type: boolean - expr: UDFToBoolean(cfloat) - type: boolean - expr: UDFToBoolean(cdouble) - type: boolean - expr: cboolean1 - type: boolean - expr: UDFToBoolean((cbigint * 0)) - type: boolean - expr: UDFToBoolean(ctimestamp1) - type: boolean - expr: UDFToBoolean(cstring1) - type: boolean - expr: UDFToInteger(ctinyint) - type: int - expr: UDFToInteger(csmallint) - type: int - expr: cint - type: int - expr: UDFToInteger(cbigint) - type: int - expr: UDFToInteger(cfloat) - type: int - expr: UDFToInteger(cdouble) - type: int - expr: UDFToInteger(cboolean1) - type: int - expr: UDFToInteger(ctimestamp1) - type: int - expr: UDFToInteger(cstring1) - type: int - expr: UDFToInteger(substr(cstring1, 1, 1)) - type: int - expr: UDFToByte(cfloat) - type: tinyint - expr: UDFToShort(cfloat) - type: smallint - expr: UDFToLong(cfloat) - type: bigint - expr: UDFToDouble(ctinyint) - type: double - expr: UDFToDouble(csmallint) - type: double - expr: UDFToDouble(cint) - type: double - expr: UDFToDouble(cbigint) - type: double - expr: UDFToDouble(cfloat) - type: double - expr: cdouble - type: double - expr: UDFToDouble(cboolean1) - type: double - expr: UDFToDouble(ctimestamp1) - type: double - expr: UDFToDouble(cstring1) - type: double - expr: UDFToDouble(substr(cstring1, 1, 1)) - type: double - expr: UDFToFloat(cint) - type: float - expr: UDFToFloat(cdouble) - type: float - expr: CAST( ctinyint AS TIMESTAMP) - type: timestamp - expr: CAST( csmallint AS TIMESTAMP) - type: timestamp - expr: CAST( cint AS TIMESTAMP) - type: timestamp - expr: CAST( cbigint AS TIMESTAMP) - type: timestamp - expr: CAST( cfloat AS TIMESTAMP) - type: timestamp - expr: CAST( cdouble AS TIMESTAMP) - type: timestamp - expr: CAST( cboolean1 AS TIMESTAMP) - type: timestamp - expr: CAST( (cbigint * 0) AS TIMESTAMP) - type: timestamp - expr: ctimestamp1 - type: timestamp - expr: CAST( cstring1 AS TIMESTAMP) - type: timestamp - expr: CAST( substr(cstring1, 1, 1) AS TIMESTAMP) - type: timestamp - expr: UDFToString(ctinyint) - type: string - expr: UDFToString(csmallint) - type: string - expr: UDFToString(cint) - type: string - expr: UDFToString(cbigint) - type: string - expr: UDFToString(cfloat) - type: string - expr: UDFToString(cdouble) - type: string - expr: UDFToString(cboolean1) - type: string - expr: UDFToString((cbigint * 0)) - type: string - expr: UDFToString(ctimestamp1) - type: string - expr: cstring1 - type: string - expr: UDFToFloat(UDFToInteger(cfloat)) - type: float - expr: UDFToDouble((cint * 2)) - type: double - expr: UDFToString(sin(cfloat)) - type: string - expr: (UDFToFloat(cint) + UDFToDouble(cboolean1)) - type: double + expressions: UDFToBoolean(ctinyint) (type: boolean), UDFToBoolean(csmallint) (type: boolean), UDFToBoolean(cint) (type: boolean), UDFToBoolean(cbigint) (type: boolean), UDFToBoolean(cfloat) (type: boolean), UDFToBoolean(cdouble) (type: boolean), cboolean1 (type: boolean), UDFToBoolean((cbigint * 0)) (type: boolean), UDFToBoolean(ctimestamp1) (type: boolean), UDFToBoolean(cstring1) (type: boolean), UDFToInteger(ctinyint) (type: int), UDFToInteger(csmallint) (type: int), cint (type: int), UDFToInteger(cbigint) (type: int), UDFToInteger(cfloat) (type: int), UDFToInteger(cdouble) (type: int), UDFToInteger(cboolean1) (type: int), UDFToInteger(ctimestamp1) (type: int), UDFToInteger(cstring1) (type: int), UDFToInteger(substr(cstring1, 1, 1)) (type: int), UDFToByte(cfloat) (type: tinyint), UDFToShort(cfloat) (type: smallint), UDFToLong(cfloat) (type: bigint), UDFToDouble(ctinyint) (type: double), UDFToDouble(csmallint) (type: double), UDFToDouble(cint) (type: double), UDFToDouble(cbigint) (type: double), UDFToDouble(cfloat) (type: double), cdouble (type: double), UDFToDouble(cboolean1) (type: double), UDFToDouble(ctimestamp1) (type: double), UDFToDouble(cstring1) (type: double), UDFToDouble(substr(cstring1, 1, 1)) (type: double), UDFToFloat(cint) (type: float), UDFToFloat(cdouble) (type: float), CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp), UDFToString(ctinyint) (type: string), UDFToString(csmallint) (type: string), UDFToString(cint) (type: string), UDFToString(cbigint) (type: string), UDFToString(cfloat) (type: string), UDFToString(cdouble) (type: string), UDFToString(cboolean1) (type: string), UDFToString((cbigint * 0)) (type: string), UDFToString(ctimestamp1) (type: string), cstring1 (type: string), UDFToFloat(UDFToInteger(cfloat)) (type: float), UDFToDouble((cint * 2)) (type: double), UDFToString(sin(cfloat)) (type: string), (UDFToFloat(cint) + UDFToDouble(cboolean1)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59 - Vectorized execution: true + Statistics: Num rows: 1071 Data size: 188530 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1071 Data size: 188530 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Vectorized execution: true + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_context.q.out ql/src/test/results/clientpositive/vectorized_context.q.out index 5cd9f61..a13f861 100644 --- ql/src/test/results/clientpositive/vectorized_context.q.out +++ ql/src/test/results/clientpositive/vectorized_context.q.out @@ -107,9 +107,6 @@ POSTHOOK: Lineage: store.s_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ] POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME store_sales)) (TOK_TABREF (TOK_TABNAME store)) (= (. (TOK_TABLE_OR_COL store_sales) ss_store_sk) (. (TOK_TABLE_OR_COL store) s_store_sk))) (TOK_TABREF (TOK_TABNAME household_demographics)) (= (. (TOK_TABLE_OR_COL store_sales) ss_hdemo_sk) (. (TOK_TABLE_OR_COL household_demographics) hd_demo_sk)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL store) s_city)) (TOK_SELEXPR (TOK_TABLE_OR_COL ss_net_profit))) (TOK_LIMIT 100))) - STAGE DEPENDENCIES: Stage-6 is a root stage Stage-4 depends on stages: Stage-6 @@ -129,80 +126,71 @@ STAGE PLANS: household_demographics TableScan alias: household_demographics + Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 {_col6} {_col2} 1 - handleSkewJoin: false keys: - 0 [Column[_col1]] - 1 [Column[hd_demo_sk]] - Position of Big Table: 0 + 0 _col1 (type: int) + 1 hd_demo_sk (type: int) store_sales TableScan alias: store_sales + Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 {ss_hdemo_sk} {ss_net_profit} 1 {s_city} - handleSkewJoin: false keys: - 0 [Column[ss_store_sk]] - 1 [Column[s_store_sk]] - Position of Big Table: 1 + 0 ss_store_sk (type: int) + 1 s_store_sk (type: int) Stage: Stage-4 Map Reduce - Alias -> Map Operator Tree: - store + Map Operator Tree: TableScan alias: store + Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {ss_hdemo_sk} {ss_net_profit} 1 {s_city} - handleSkewJoin: false keys: - 0 [Column[ss_store_sk]] - 1 [Column[s_store_sk]] + 0 ss_store_sk (type: int) + 1 s_store_sk (type: int) outputColumnNames: _col1, _col2, _col6 - Position of Big Table: 1 - Vectorized execution: true + Statistics: Num rows: 6682 Data size: 80009 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col6} {_col2} 1 - handleSkewJoin: false keys: - 0 [Column[_col1]] - 1 [Column[hd_demo_sk]] + 0 _col1 (type: int) + 1 hd_demo_sk (type: int) outputColumnNames: _col1, _col6 - Position of Big Table: 0 - Vectorized execution: true + Statistics: Num rows: 7350 Data size: 88009 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col1 - type: string - expr: _col6 - type: double + expressions: _col1 (type: string), _col6 (type: double) outputColumnNames: _col0, _col1 - Vectorized execution: true + Statistics: Num rows: 7350 Data size: 88009 Basic stats: COMPLETE Column stats: NONE Limit - Vectorized execution: true + Number of rows: 100 + Statistics: Num rows: 100 Data size: 1100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 100 Data size: 1100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Vectorized execution: true Local Work: Map Reduce Local Work + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/vectorized_mapjoin.q.out index 13d5489..9590642 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin.q.out @@ -6,9 +6,6 @@ POSTHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG( FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME alltypesorc) t1) (TOK_TABREF (TOK_TABNAME alltypesorc) t2) (= (. (TOK_TABLE_OR_COL t1) cint) (. (TOK_TABLE_OR_COL t2) cint)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL t1) cint))) (TOK_SELEXPR (TOK_FUNCTION MAX (. (TOK_TABLE_OR_COL t2) cint))) (TOK_SELEXPR (TOK_FUNCTION MIN (. (TOK_TABLE_OR_COL t1) cint))) (TOK_SELEXPR (TOK_FUNCTION AVG (+ (. (TOK_TABLE_OR_COL t1) cint) (. (TOK_TABLE_OR_COL t2) cint))))))) - STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 @@ -25,91 +22,61 @@ STAGE PLANS: t1 TableScan alias: t1 + Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 {cint} 1 {cint} - handleSkewJoin: false keys: - 0 [Column[cint]] - 1 [Column[cint]] - Position of Big Table: 1 + 0 cint (type: int) + 1 cint (type: int) Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: - t2 + Map Operator Tree: TableScan alias: t2 + Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {cint} 1 {cint} - handleSkewJoin: false keys: - 0 [Column[cint]] - 1 [Column[cint]] + 0 cint (type: int) + 1 cint (type: int) outputColumnNames: _col2, _col16 - Position of Big Table: 1 - Vectorized execution: true + Statistics: Num rows: 103739 Data size: 414960 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: int - expr: _col16 - type: int + expressions: _col2 (type: int), _col16 (type: int) outputColumnNames: _col2, _col16 - Vectorized execution: true + Statistics: Num rows: 103739 Data size: 414960 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(_col2) - expr: max(_col16) - expr: min(_col2) - expr: avg((_col2 + _col16)) - bucketGroup: false + aggregations: count(_col2), max(_col16), min(_col2), avg((_col2 + _col16)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Vectorized execution: true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: int - expr: _col2 - type: int - expr: _col3 - type: struct + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Local Work: Map Reduce Local Work + Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: max(VALUE._col1) - expr: min(VALUE._col2) - expr: avg(VALUE._col3) - bucketGroup: false + aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: int - expr: _col2 - type: int - expr: _col3 - type: double + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/vectorized_math_funcs.q.out ql/src/test/results/clientpositive/vectorized_math_funcs.q.out index 4da70b0..fbe7805 100644 --- ql/src/test/results/clientpositive/vectorized_math_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_math_funcs.q.out @@ -106,9 +106,6 @@ where cbigint % 500 = 0 -- test use of a math function in the WHERE clause and sin(cfloat) >= -1.0 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL cdouble)) (TOK_SELEXPR (TOK_FUNCTION Round (TOK_TABLE_OR_COL cdouble) 2)) (TOK_SELEXPR (TOK_FUNCTION Floor (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Ceil (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Rand)) (TOK_SELEXPR (TOK_FUNCTION Rand 98007)) (TOK_SELEXPR (TOK_FUNCTION Exp (TOK_FUNCTION ln (TOK_TABLE_OR_COL cdouble)))) (TOK_SELEXPR (TOK_FUNCTION Ln (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Ln (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (TOK_FUNCTION Log10 (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Log2 (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Log2 (- (TOK_TABLE_OR_COL cdouble) 15601.0))) (TOK_SELEXPR (TOK_FUNCTION Log2 (TOK_TABLE_OR_COL cfloat))) (TOK_SELEXPR (TOK_FUNCTION Log2 (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (TOK_FUNCTION Log2 (TOK_TABLE_OR_COL cint))) (TOK_SELEXPR (TOK_FUNCTION Log2 (TOK_TABLE_OR_COL csmallint))) (TOK_SELEXPR (TOK_FUNCTION Log2 (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (TOK_FUNCTION Log 2.0 (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Pow (TOK_FUNCTION log2 (TOK_TABLE_OR_COL cdouble)) 2.0)) (TOK_SELEXPR (TOK_FUNCTION Power (TOK_FUNCTION log2 (TOK_TABLE_OR_COL cdouble)) 2.0)) (TOK_SELEXPR (TOK_FUNCTION Sqrt (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Sqrt (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (TOK_FUNCTION Bin (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (TOK_FUNCTION Hex (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Conv (TOK_TABLE_OR_COL cbigint) 10 16)) (TOK_SELEXPR (TOK_FUNCTION Abs (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Abs (TOK_TABLE_OR_COL ctinyint))) (TOK_SELEXPR (TOK_FUNCTION Pmod (TOK_TABLE_OR_COL cint) 3)) (TOK_SELEXPR (TOK_FUNCTION Sin (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Asin (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Cos (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION ACos (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Atan (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Degrees (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Radians (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Positive (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Positive (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (TOK_FUNCTION Negative (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Sign (TOK_TABLE_OR_COL cdouble))) (TOK_SELEXPR (TOK_FUNCTION Sign (TOK_TABLE_OR_COL cbigint))) (TOK_SELEXPR (TOK_FUNCTION cos (+ (- (TOK_FUNCTION sin (TOK_FUNCTION log (TOK_TABLE_OR_COL cdouble)))) 3.14159)))) (TOK_WHERE (and (= (% (TOK_TABLE_OR_COL cbigint) 500) 0) (>= (TOK_FUNCTION sin (TOK_TABLE_OR_COL cfloat)) (- 1.0)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -116,109 +113,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 11788 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: (((cbigint % 500) = 0) and (sin(cfloat) >= (- 1.0))) - type: boolean - Vectorized execution: true + predicate: (((cbigint % 500) = 0) and (sin(cfloat) >= (- 1.0))) (type: boolean) + Statistics: Num rows: 1964 Data size: 62851 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: cdouble - type: double - expr: round(cdouble, 2) - type: double - expr: floor(cdouble) - type: bigint - expr: ceil(cdouble) - type: bigint - expr: rand() - type: double - expr: rand(98007) - type: double - expr: exp(ln(cdouble)) - type: double - expr: ln(cdouble) - type: double - expr: ln(cfloat) - type: double - expr: log10(cdouble) - type: double - expr: log2(cdouble) - type: double - expr: log2((cdouble - 15601.0)) - type: double - expr: log2(cfloat) - type: double - expr: log2(cbigint) - type: double - expr: log2(cint) - type: double - expr: log2(csmallint) - type: double - expr: log2(ctinyint) - type: double - expr: log(2.0, cdouble) - type: double - expr: power(log2(cdouble), 2.0) - type: double - expr: power(log2(cdouble), 2.0) - type: double - expr: sqrt(cdouble) - type: double - expr: sqrt(cbigint) - type: double - expr: bin(cbigint) - type: string - expr: hex(cdouble) - type: string - expr: conv(cbigint, 10, 16) - type: string - expr: abs(cdouble) - type: double - expr: abs(ctinyint) - type: int - expr: (cint pmod 3) - type: int - expr: sin(cdouble) - type: double - expr: asin(cdouble) - type: double - expr: cos(cdouble) - type: double - expr: acos(cdouble) - type: double - expr: atan(cdouble) - type: double - expr: degrees(cdouble) - type: double - expr: radians(cdouble) - type: double - expr: cdouble - type: double - expr: cbigint - type: bigint - expr: (- cdouble) - type: double - expr: sign(cdouble) - type: double - expr: sign(cbigint) - type: double - expr: cos(((- sin(log(cdouble))) + 3.14159)) - type: double + expressions: cdouble (type: double), round(cdouble, 2) (type: double), floor(cdouble) (type: bigint), ceil(cdouble) (type: bigint), rand() (type: double), rand(98007) (type: double), exp(ln(cdouble)) (type: double), ln(cdouble) (type: double), ln(cfloat) (type: double), log10(cdouble) (type: double), log2(cdouble) (type: double), log2((cdouble - 15601.0)) (type: double), log2(cfloat) (type: double), log2(cbigint) (type: double), log2(cint) (type: double), log2(csmallint) (type: double), log2(ctinyint) (type: double), log(2.0, cdouble) (type: double), power(log2(cdouble), 2.0) (type: double), power(log2(cdouble), 2.0) (type: double), sqrt(cdouble) (type: double), sqrt(cbigint) (type: double), bin(cbigint) (type: string), hex(cdouble) (type: string), conv(cbigint, 10, 16) (type: string), abs(cdouble) (type: double), abs(ctinyint) (type: int), (cint pmod 3) (type: int), sin(cdouble) (type: double), asin(cdouble) (type: double), cos(cdouble) (type: double), acos(cdouble) (type: double), atan(cdouble) (type: double), degrees(cdouble) (type: double), radians(cdouble) (type: double), cdouble (type: double), cbigint (type: bigint), (- cdouble) (type: double), sign(cdouble) (type: double), sign(cbigint) (type: double), cos(((- sin(log(cdouble))) + 3.14159)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40 - Vectorized execution: true + Statistics: Num rows: 1964 Data size: 62851 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1964 Data size: 62851 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Vectorized execution: true + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out index 7f51455..928bc82 100644 --- ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out @@ -6,9 +6,6 @@ POSTHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG( FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME alltypesorc) t1) (TOK_TABREF (TOK_TABNAME alltypesorc) t2) (= (. (TOK_TABLE_OR_COL t1) cint) (. (TOK_TABLE_OR_COL t2) cint)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL t1) cint))) (TOK_SELEXPR (TOK_FUNCTION MAX (. (TOK_TABLE_OR_COL t2) cint))) (TOK_SELEXPR (TOK_FUNCTION MIN (. (TOK_TABLE_OR_COL t1) cint))) (TOK_SELEXPR (TOK_FUNCTION AVG (+ (. (TOK_TABLE_OR_COL t1) cint) (. (TOK_TABLE_OR_COL t2) cint))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -17,39 +14,26 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - t1 + Map Operator Tree: TableScan - alias: t1 + alias: t2 + Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: cint - type: int + key expressions: cint (type: int) sort order: + - Map-reduce partition columns: - expr: cint - type: int - tag: 0 - value expressions: - expr: cint - type: int - Vectorized execution: true - t2 + Map-reduce partition columns: cint (type: int) + Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + value expressions: cint (type: int) TableScan - alias: t2 + alias: t1 + Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: - expr: cint - type: int + key expressions: cint (type: int) sort order: + - Map-reduce partition columns: - expr: cint - type: int - tag: 1 - value expressions: - expr: cint - type: int - Vectorized execution: true + Map-reduce partition columns: cint (type: int) + Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + value expressions: cint (type: int) + Execution mode: vectorized Reduce Operator Tree: Join Operator condition map: @@ -57,27 +41,19 @@ STAGE PLANS: condition expressions: 0 {VALUE._col2} 1 {VALUE._col2} - handleSkewJoin: false outputColumnNames: _col2, _col16 + Statistics: Num rows: 103739 Data size: 414960 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col2 - type: int - expr: _col16 - type: int + expressions: _col2 (type: int), _col16 (type: int) outputColumnNames: _col2, _col16 + Statistics: Num rows: 103739 Data size: 414960 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: - expr: count(_col2) - expr: max(_col16) - expr: min(_col2) - expr: avg((_col2 + _col16)) - bucketGroup: false + aggregations: count(_col2), max(_col16), min(_col2), avg((_col2 + _col16)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -85,45 +61,25 @@ STAGE PLANS: Stage: Stage-2 Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### + Map Operator Tree: TableScan Reduce Output Operator sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: int - expr: _col2 - type: int - expr: _col3 - type: struct + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: - expr: count(VALUE._col0) - expr: max(VALUE._col1) - expr: min(VALUE._col2) - expr: avg(VALUE._col3) - bucketGroup: false + aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: _col0 - type: bigint - expr: _col1 - type: int - expr: _col2 - type: int - expr: _col3 - type: double + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/vectorized_string_funcs.q.out ql/src/test/results/clientpositive/vectorized_string_funcs.q.out index 325f11d..3baf6fe 100644 --- ql/src/test/results/clientpositive/vectorized_string_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_string_funcs.q.out @@ -46,9 +46,6 @@ where cbigint % 237 = 0 and length(substr(cstring1, 1, 2)) <= 2 and cstring1 like '%' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (TOK_TABLE_OR_COL cstring1) 1 2)) (TOK_SELEXPR (TOK_FUNCTION substr (TOK_TABLE_OR_COL cstring1) 2)) (TOK_SELEXPR (TOK_FUNCTION lower (TOK_TABLE_OR_COL cstring1))) (TOK_SELEXPR (TOK_FUNCTION upper (TOK_TABLE_OR_COL cstring1))) (TOK_SELEXPR (TOK_FUNCTION ucase (TOK_TABLE_OR_COL cstring1))) (TOK_SELEXPR (TOK_FUNCTION length (TOK_TABLE_OR_COL cstring1))) (TOK_SELEXPR (TOK_FUNCTION trim (TOK_TABLE_OR_COL cstring1))) (TOK_SELEXPR (TOK_FUNCTION ltrim (TOK_TABLE_OR_COL cstring1))) (TOK_SELEXPR (TOK_FUNCTION rtrim (TOK_TABLE_OR_COL cstring1))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_TABLE_OR_COL cstring1) (TOK_TABLE_OR_COL cstring2))) (TOK_SELEXPR (TOK_FUNCTION concat '>' (TOK_TABLE_OR_COL cstring1))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_TABLE_OR_COL cstring1) '<')) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (TOK_TABLE_OR_COL cstring1) 1 2) (TOK_FUNCTION substr (TOK_TABLE_OR_COL cstring2) 1 2)))) (TOK_WHERE (and (and (= (% (TOK_TABLE_OR_COL cbigint) 237) 0) (<= (TOK_FUNCTION length (TOK_FUNCTION substr (TOK_TABLE_OR_COL cstring1) 1 2)) 2)) (like (TOK_TABLE_OR_COL cstring1) '%'))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -56,53 +53,25 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc + Map Operator Tree: TableScan alias: alltypesorc + Statistics: Num rows: 1813 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: - expr: ((((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2)) and (cstring1 like '%')) - type: boolean - Vectorized execution: true + predicate: ((((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2)) and (cstring1 like '%')) (type: boolean) + Statistics: Num rows: 151 Data size: 31419 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: substr(cstring1, 1, 2) - type: string - expr: substr(cstring1, 2) - type: string - expr: lower(cstring1) - type: string - expr: upper(cstring1) - type: string - expr: upper(cstring1) - type: string - expr: length(cstring1) - type: int - expr: trim(cstring1) - type: string - expr: ltrim(cstring1) - type: string - expr: rtrim(cstring1) - type: string - expr: concat(cstring1, cstring2) - type: string - expr: concat('>', cstring1) - type: string - expr: concat(cstring1, '<') - type: string - expr: concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) - type: string + expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Vectorized execution: true + Statistics: Num rows: 151 Data size: 31419 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 151 Data size: 31419 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Vectorized execution: true + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 7a22165..1980eb6 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -99,9 +99,6 @@ POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)allty POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc_string))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION to_unix_timestamp (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION year (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION month (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION day (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION dayofmonth (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION weekofyear (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION hour (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION minute (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION second (TOK_TABLE_OR_COL ctimestamp1)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -109,40 +106,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc_string + Map Operator Tree: TableScan alias: alltypesorc_string + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: to_unix_timestamp(ctimestamp1) - type: bigint - expr: year(ctimestamp1) - type: int - expr: month(ctimestamp1) - type: int - expr: day(ctimestamp1) - type: int - expr: dayofmonth(ctimestamp1) - type: int - expr: weekofyear(ctimestamp1) - type: int - expr: hour(ctimestamp1) - type: int - expr: minute(ctimestamp1) - type: int - expr: second(ctimestamp1) - type: int + expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Vectorized execution: true + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Vectorized execution: true + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -250,9 +229,6 @@ POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)allty POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc_string))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION to_unix_timestamp (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION year (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION month (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION day (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION dayofmonth (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION weekofyear (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION hour (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION minute (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION second (TOK_TABLE_OR_COL stimestamp1)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -260,40 +236,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc_string + Map Operator Tree: TableScan alias: alltypesorc_string + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: to_unix_timestamp(stimestamp1) - type: bigint - expr: year(stimestamp1) - type: int - expr: month(stimestamp1) - type: int - expr: day(stimestamp1) - type: int - expr: dayofmonth(stimestamp1) - type: int - expr: weekofyear(stimestamp1) - type: int - expr: hour(stimestamp1) - type: int - expr: minute(stimestamp1) - type: int - expr: second(stimestamp1) - type: int + expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Vectorized execution: true + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Vectorized execution: true + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -401,9 +359,6 @@ POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)allty POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc_string))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (= (TOK_FUNCTION to_unix_timestamp (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION to_unix_timestamp (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION year (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION year (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION month (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION month (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION day (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION day (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION dayofmonth (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION dayofmonth (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION weekofyear (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION weekofyear (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION hour (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION hour (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION minute (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION minute (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION second (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION second (TOK_TABLE_OR_COL stimestamp1))))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -411,40 +366,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc_string + Map Operator Tree: TableScan alias: alltypesorc_string + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) - type: boolean - expr: (year(ctimestamp1) = year(stimestamp1)) - type: boolean - expr: (month(ctimestamp1) = month(stimestamp1)) - type: boolean - expr: (day(ctimestamp1) = day(stimestamp1)) - type: boolean - expr: (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) - type: boolean - expr: (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) - type: boolean - expr: (hour(ctimestamp1) = hour(stimestamp1)) - type: boolean - expr: (minute(ctimestamp1) = minute(stimestamp1)) - type: boolean - expr: (second(ctimestamp1) = second(stimestamp1)) - type: boolean + expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Vectorized execution: true + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Vectorized execution: true + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -556,9 +493,6 @@ POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)allty POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc_wrong))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION to_unix_timestamp (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION year (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION month (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION day (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION dayofmonth (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION weekofyear (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION hour (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION minute (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION second (TOK_TABLE_OR_COL stimestamp1)))))) - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage @@ -566,40 +500,22 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Map Reduce - Alias -> Map Operator Tree: - alltypesorc_wrong + Map Operator Tree: TableScan alias: alltypesorc_wrong + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: - expr: to_unix_timestamp(stimestamp1) - type: bigint - expr: year(stimestamp1) - type: int - expr: month(stimestamp1) - type: int - expr: day(stimestamp1) - type: int - expr: dayofmonth(stimestamp1) - type: int - expr: weekofyear(stimestamp1) - type: int - expr: hour(stimestamp1) - type: int - expr: minute(stimestamp1) - type: int - expr: second(stimestamp1) - type: int + expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Vectorized execution: true + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Vectorized execution: true + Execution mode: vectorized Stage: Stage-0 Fetch Operator